Manoj Kumar commited on
Commit
f860f0a
·
1 Parent(s): e6f4fec
Files changed (1) hide show
  1. Mark-1/phas1_v2.py +209 -0
Mark-1/phas1_v2.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ import spacy
3
+ import re
4
+ from thefuzz import process
5
+ import numpy as np
6
+ from transformers import pipeline
7
+
8
+ # Load intent classification model
9
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
10
+ nlp = spacy.load("en_core_web_sm")
11
+ nlp_vectors = spacy.load("en_core_web_md")
12
+
13
+ # Define operator mappings
14
+ operator_mappings = {
15
+ "greater than": ">",
16
+ "less than": "<",
17
+ "equal to": "=",
18
+ "not equal to": "!=",
19
+ "starts with": "LIKE",
20
+ "ends with": "LIKE",
21
+ "contains": "LIKE",
22
+ "above": ">",
23
+ "below": "<",
24
+ "more than": ">",
25
+ "less than": "<",
26
+ "<": "<",
27
+ ">": ">"
28
+ }
29
+
30
+ # Connect to SQLite database
31
+ def connect_to_db(db_path):
32
+ try:
33
+ conn = sqlite3.connect(db_path)
34
+ return conn
35
+ except sqlite3.Error as e:
36
+ print(f"Error connecting to database: {e}")
37
+ return None
38
+
39
+ # Fetch database schema
40
+ def fetch_schema(conn):
41
+ cursor = conn.cursor()
42
+ query = """
43
+ SELECT name
44
+ FROM sqlite_master
45
+ WHERE type='table';
46
+ """
47
+ cursor.execute(query)
48
+ tables = cursor.fetchall()
49
+
50
+ schema = {}
51
+ for table in tables:
52
+ table_name = table[0]
53
+ cursor.execute(f"PRAGMA table_info({table_name});")
54
+ columns = cursor.fetchall()
55
+ schema[table_name] = [{"name": col[1], "type": col[2], "not_null": col[3], "default": col[4], "pk": col[5]} for col in columns]
56
+
57
+ return schema
58
+
59
+ # Match token to schema columns using vector similarity and fuzzy matching
60
+ def find_best_match(token_text, table_schema):
61
+ """Return the best-matching column from table_schema."""
62
+ token_vec = nlp_vectors(token_text).vector
63
+ best_col = None
64
+ best_score = 0.0
65
+
66
+ for col in table_schema:
67
+ col_vec = nlp_vectors(col).vector
68
+ score = token_vec.dot(col_vec) / (np.linalg.norm(token_vec) * np.linalg.norm(col_vec))
69
+ if score > best_score:
70
+ best_score = score
71
+ best_col = col
72
+
73
+ if best_score > 0.65:
74
+ return best_col
75
+
76
+ # Fallback to fuzzy matching if vector similarity fails
77
+ best_fuzzy_match, fuzzy_score = process.extractOne(token_text, table_schema)
78
+ if fuzzy_score > 80:
79
+ return best_fuzzy_match
80
+
81
+ return None
82
+
83
+ # Extract conditions from user query
84
+ def extract_conditions(question, schema, table):
85
+ table_schema = [col["name"].lower() for col in schema.get(table, [])]
86
+
87
+ # Detect whether the user used 'AND' / 'OR'
88
+ use_and = " and " in question.lower()
89
+ use_or = " or " in question.lower()
90
+
91
+ condition_parts = re.split(r"\band\b|\bor\b", question, flags=re.IGNORECASE)
92
+ conditions = []
93
+
94
+ for part in condition_parts:
95
+ part = part.strip()
96
+ tokens = [token.text.lower() for token in nlp(part)]
97
+ current_col = None
98
+
99
+ for token in tokens:
100
+ possible_col = find_best_match(token, table_schema)
101
+ if possible_col:
102
+ current_col = possible_col
103
+ break
104
+
105
+ if current_col:
106
+ for phrase, sql_operator in operator_mappings.items():
107
+ if phrase in part:
108
+ value_start = part.lower().find(phrase) + len(phrase)
109
+ value = part[value_start:].strip().split()[0]
110
+
111
+ if sql_operator == "LIKE":
112
+ if "starts with" in phrase:
113
+ value = f"'{value}%'"
114
+ elif "ends with" in phrase:
115
+ value = f"'%{value}'"
116
+ elif "contains" in phrase:
117
+ value = f"'%{value}%'"
118
+
119
+ conditions.append(f"{current_col} {sql_operator} {value}")
120
+ break
121
+
122
+ if use_and:
123
+ return " AND ".join(conditions)
124
+ elif use_or:
125
+ return " OR ".join(conditions)
126
+ else:
127
+ return " AND ".join(conditions) if conditions else None
128
+
129
+ # Main interpretation and execution
130
+
131
+ def interpret_question(question, schema):
132
+ intents = {
133
+ "describe_table": "Provide information about the columns and structure of a table.",
134
+ "list_table_data": "Fetch and display all data stored in a table.",
135
+ "count_records": "Count the number of records in a table.",
136
+ "fetch_column": "Fetch a specific column's data from a table.",
137
+ "fetch_all_data": "Fetch all records from a table without filters.",
138
+ "filter_data_with_conditions": "Fetch records based on specific conditions."
139
+ }
140
+
141
+ labels = list(intents.keys())
142
+ result = classifier(question, labels, multi_label=True)
143
+ scores = result["scores"]
144
+ predicted_label_index = np.argmax(scores)
145
+ predicted_intent = labels[predicted_label_index]
146
+
147
+ # Extract table name using schema and fuzzy matching
148
+ table, score = process.extractOne(question, schema.keys())
149
+ if score > 80:
150
+ return {"intent": predicted_intent, "table": table}
151
+
152
+ return {"intent": predicted_intent, "table": None}
153
+
154
+ def handle_intent(intent_data, schema, conn, question):
155
+ intent = intent_data["intent"]
156
+ table = intent_data["table"]
157
+
158
+ if not table:
159
+ return "I couldn't identify which table you're referring to."
160
+
161
+ if intent == "describe_table":
162
+ return schema.get(table, "No such table found.")
163
+ elif intent in ["list_table_data", "fetch_all_data"]:
164
+ conditions = extract_conditions(question, schema, table) if intent == "list_table_data" else None
165
+ query = f"SELECT * FROM {table}"
166
+ if conditions:
167
+ query += f" WHERE {conditions}"
168
+ cursor = conn.cursor()
169
+ cursor.execute(query)
170
+ return cursor.fetchall()
171
+ elif intent == "count_records":
172
+ query = f"SELECT COUNT(*) FROM {table}"
173
+ cursor = conn.cursor()
174
+ cursor.execute(query)
175
+ return cursor.fetchone()
176
+ elif intent == "fetch_column":
177
+ column = extract_conditions(question, schema, table)
178
+ if column:
179
+ query = f"SELECT {column} FROM {table}"
180
+ cursor = conn.cursor()
181
+ cursor.execute(query)
182
+ return cursor.fetchall()
183
+ else:
184
+ return "I couldn't identify which column you're referring to."
185
+ elif intent == "filter_data_with_conditions":
186
+ conditions = extract_conditions(question, schema, table)
187
+ query = f"SELECT * FROM {table} WHERE {conditions}"
188
+ cursor = conn.cursor()
189
+ cursor.execute(query)
190
+ return cursor.fetchall()
191
+
192
+ return "Unsupported intent."
193
+
194
+ # Entry point
195
+
196
+ def answer_question(question, conn, schema):
197
+ intent_data = interpret_question(question, schema)
198
+ return handle_intent(intent_data, schema, conn, question)
199
+
200
+ if __name__ == "__main__":
201
+ db_path = "./ecommerce.db"
202
+ conn = connect_to_db(db_path)
203
+ if conn:
204
+ schema = fetch_schema(conn)
205
+ while True:
206
+ question = input("\nAsk a question about the database: ")
207
+ if question.lower() in ["exit", "quit"]:
208
+ break
209
+ print(answer_question(question, conn, schema))