Spaces:
Running
Running
Commit
·
560fd9c
1
Parent(s):
0474452
Initial commit
Browse files
Qbot.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.llms import HuggingFaceHub
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
4 |
+
from langchain_community.vectorstores import Chroma
|
5 |
+
from langchain_community.document_loaders import PyPDFLoader
|
6 |
+
from langchain.chains import RetrievalQA
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
#ignoring Unnecessary warnings
|
10 |
+
def warn(*arg, **kwargs):
|
11 |
+
pass
|
12 |
+
import warnings
|
13 |
+
warnings.warn = warn
|
14 |
+
warnings.filterwarnings("ignore")
|
15 |
+
|
16 |
+
#Initialize LLM
|
17 |
+
|
18 |
+
def get_llm(model_id, temperature, max_new_tokens):
|
19 |
+
|
20 |
+
huggingface_api_key = ""
|
21 |
+
|
22 |
+
#initialize the pipeline
|
23 |
+
Qbot_llm = HuggingFaceHub(
|
24 |
+
repo_id=model_id,
|
25 |
+
model_kwargs={"max_new_tokens": max_new_tokens, "temperature": temperature},
|
26 |
+
huggingfacehub_api_token=huggingface_api_key
|
27 |
+
)
|
28 |
+
return Qbot_llm
|
29 |
+
|
30 |
+
#Document Loader
|
31 |
+
def document_loader(file):
|
32 |
+
loader = PyPDFLoader(file.name)
|
33 |
+
loaded_document = loader.load()
|
34 |
+
return loaded_document
|
35 |
+
|
36 |
+
#Define Text Splitter
|
37 |
+
def text_splitter(data):
|
38 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
39 |
+
chunk_size = 1000,
|
40 |
+
chunk_overlap = 50,
|
41 |
+
length_function = len,
|
42 |
+
)
|
43 |
+
chunks = text_splitter.split_documents(data)
|
44 |
+
return chunks
|
45 |
+
|
46 |
+
#Define Vector store
|
47 |
+
def vector_database(chunks, embedding_model_name):
|
48 |
+
embedding_model = HuggingFaceEmbeddings(model_name = embedding_model_name)
|
49 |
+
vectordb = Chroma.from_documents(chunks, embedding_model)
|
50 |
+
return vectordb
|
51 |
+
|
52 |
+
#Define Embedding Model
|
53 |
+
def huggingface_embeddings(model_name):
|
54 |
+
huggingface_embedding = HuggingFaceEmbeddings(model_name=model_name)
|
55 |
+
|
56 |
+
return huggingface_embedding
|
57 |
+
|
58 |
+
#Define Retriever
|
59 |
+
def retriever(file, embedding_model_name):
|
60 |
+
splits = document_loader(file)
|
61 |
+
chunks = text_splitter(splits)
|
62 |
+
vectordb = vector_database(chunks, embedding_model_name)
|
63 |
+
retriever = vectordb.as_retriever()
|
64 |
+
return retriever
|
65 |
+
|
66 |
+
#Define a Question Answering Chain
|
67 |
+
#QA chain
|
68 |
+
def retriever_qa(file, query, llm_model, temperature, max_new_tokens, embedding_model):
|
69 |
+
llm = get_llm(llm_model, temperature, max_new_tokens)
|
70 |
+
retriever_obj = retriever(file, embedding_model)
|
71 |
+
qa = RetrievalQA.from_chain_type(llm = llm,
|
72 |
+
chain_type = "stuff",
|
73 |
+
retriever = retriever_obj,
|
74 |
+
return_source_documents = False
|
75 |
+
)
|
76 |
+
response = qa.invoke(query)
|
77 |
+
return response["result"]
|
78 |
+
|
79 |
+
llm_models = [
|
80 |
+
"EleutherAI/gpt-neo-2.7B",
|
81 |
+
"google/flan-t5-large",
|
82 |
+
"google/flan-t5-xl",
|
83 |
+
]
|
84 |
+
|
85 |
+
embedding_models = [
|
86 |
+
"sentence-transformers/all-distilroberta-v1",
|
87 |
+
"sentence-transformers/all-mpnet-base-v2",
|
88 |
+
"sentence-transformers/all-MiniLM-L6-v2",
|
89 |
+
]
|
90 |
+
|
91 |
+
# CSS for custom styling
|
92 |
+
custom_css = """
|
93 |
+
#component-0 {
|
94 |
+
max-width: 800px;
|
95 |
+
margin: auto;
|
96 |
+
padding: 20px;
|
97 |
+
}
|
98 |
+
.gradio-container {
|
99 |
+
font-family: 'Arial', sans-serif;
|
100 |
+
}
|
101 |
+
.gr-button {
|
102 |
+
background: linear-gradient(90deg, #4CAF50 0%, #45a049 100%);
|
103 |
+
border: none;
|
104 |
+
color: white;
|
105 |
+
}
|
106 |
+
.gr-button:hover {
|
107 |
+
background: linear-gradient(90deg, #45a049 0%, #4CAF50 100%);
|
108 |
+
transform: translateY(-2px);
|
109 |
+
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
|
110 |
+
}
|
111 |
+
.gr-form {
|
112 |
+
background-color: #ffffff;
|
113 |
+
border-radius: 12px;
|
114 |
+
padding: 20px;
|
115 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
116 |
+
}
|
117 |
+
.gr-box {
|
118 |
+
border-radius: 8px;
|
119 |
+
border: 1px solid #e0e0e0;
|
120 |
+
}
|
121 |
+
"""
|
122 |
+
|
123 |
+
#Gradio Interface
|
124 |
+
rag_application = gr.Interface(
|
125 |
+
fn = retriever_qa,
|
126 |
+
allow_flagging = "never",
|
127 |
+
inputs=[
|
128 |
+
gr.File(label="Upload PDF File", file_count="single", file_types=[".pdf"],type="filepath", elem_classes="gr-box"), # Drag and drop file upload
|
129 |
+
gr.Textbox(label = "Input Query", lines=2, placeholder="Type your question here...", elem_classes="gr-box"),
|
130 |
+
gr.Dropdown(choices=llm_models, value="EleutherAI/gpt-neo-2.7B", label="LLM Model", elem_classes="gr-box"),
|
131 |
+
gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.1, label="Temperature", elem_classes="gr-box"),
|
132 |
+
gr.Slider(minimum=64, maximum=512, value=128, step=32, label="Max Tokens", elem_classes="gr-box"),
|
133 |
+
gr.Dropdown(choices=embedding_models, value="sentence-transformers/all-distilroberta-v1", label="Embedding Model", elem_classes="gr-box")
|
134 |
+
],
|
135 |
+
outputs=gr.Textbox(label="Output"),
|
136 |
+
title = "📚 QBot - Your PDF Assistant",
|
137 |
+
description="""
|
138 |
+
### Welcome to QBot - Your Intelligent PDF Analysis Companion!
|
139 |
+
|
140 |
+
Transform any PDF document into an interactive knowledge base. Ask questions naturally and get precise answers powered by advanced language models.
|
141 |
+
|
142 |
+
#### Features:
|
143 |
+
🔍 Intelligent PDF Processing
|
144 |
+
💡 Multiple Language Models
|
145 |
+
🎯 Customizable Response Settings
|
146 |
+
🔄 Various Embedding Options
|
147 |
+
|
148 |
+
#### How to Use:
|
149 |
+
1. **Upload PDF**: Drop your document in the file uploader
|
150 |
+
2. **Ask Questions**: Type any question about your document
|
151 |
+
3. **Customize Settings**:
|
152 |
+
- Choose your preferred Language Model
|
153 |
+
- Adjust Temperature (0-1) for response creativity
|
154 |
+
- Set Max Tokens for response length
|
155 |
+
- Select Embedding Model for document processing
|
156 |
+
4. **Get Answers**: Receive AI-powered responses from your document
|
157 |
+
""",
|
158 |
+
article="""
|
159 |
+
#### Advanced Tips:
|
160 |
+
📊 **Model Selection**:
|
161 |
+
- GPT-Neo 2.7B: Best for general-purpose queries
|
162 |
+
- FLAN-T5 Large: Efficient for straightforward questions
|
163 |
+
- FLAN-T5 XL: Ideal for complex analysis
|
164 |
+
|
165 |
+
🎛️ **Parameter Guide**:
|
166 |
+
- Temperature: Lower (0.1-0.4) for factual, Higher (0.6-0.9) for creative
|
167 |
+
- Max Tokens: 128 for brief answers, 256+ for detailed explanations
|
168 |
+
- Embedding Models: Choose based on document complexity and language
|
169 |
+
|
170 |
+
💫 Powered by LangChain and Hugging Face
|
171 |
+
Made with 🤖 for seamless document interaction
|
172 |
+
""",
|
173 |
+
theme=gr.themes.Soft(
|
174 |
+
primary_hue="green",
|
175 |
+
secondary_hue="gray",
|
176 |
+
neutral_hue="gray",
|
177 |
+
radius_size=gr.themes.sizes.radius_sm,
|
178 |
+
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"]
|
179 |
+
),
|
180 |
+
css=custom_css
|
181 |
+
)
|
182 |
+
|
183 |
+
#Launch app
|
184 |
+
rag_application.launch(share=True)
|
README.md
CHANGED
@@ -1,14 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: yellow
|
5 |
-
colorTo: purple
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
short_description: Geadio app
|
12 |
---
|
13 |
-
|
14 |
-
An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
|
|
|
1 |
---
|
2 |
+
title: Qbot
|
3 |
+
app_file: Qbot.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
+
sdk_version: 4.44.0
|
|
|
|
|
|
|
|
|
6 |
---
|
|
|
|
app.py
DELETED
@@ -1,64 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from huggingface_hub import InferenceClient
|
3 |
-
|
4 |
-
"""
|
5 |
-
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
6 |
-
"""
|
7 |
-
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
8 |
-
|
9 |
-
|
10 |
-
def respond(
|
11 |
-
message,
|
12 |
-
history: list[tuple[str, str]],
|
13 |
-
system_message,
|
14 |
-
max_tokens,
|
15 |
-
temperature,
|
16 |
-
top_p,
|
17 |
-
):
|
18 |
-
messages = [{"role": "system", "content": system_message}]
|
19 |
-
|
20 |
-
for val in history:
|
21 |
-
if val[0]:
|
22 |
-
messages.append({"role": "user", "content": val[0]})
|
23 |
-
if val[1]:
|
24 |
-
messages.append({"role": "assistant", "content": val[1]})
|
25 |
-
|
26 |
-
messages.append({"role": "user", "content": message})
|
27 |
-
|
28 |
-
response = ""
|
29 |
-
|
30 |
-
for message in client.chat_completion(
|
31 |
-
messages,
|
32 |
-
max_tokens=max_tokens,
|
33 |
-
stream=True,
|
34 |
-
temperature=temperature,
|
35 |
-
top_p=top_p,
|
36 |
-
):
|
37 |
-
token = message.choices[0].delta.content
|
38 |
-
|
39 |
-
response += token
|
40 |
-
yield response
|
41 |
-
|
42 |
-
|
43 |
-
"""
|
44 |
-
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
45 |
-
"""
|
46 |
-
demo = gr.ChatInterface(
|
47 |
-
respond,
|
48 |
-
additional_inputs=[
|
49 |
-
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
50 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
51 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
52 |
-
gr.Slider(
|
53 |
-
minimum=0.1,
|
54 |
-
maximum=1.0,
|
55 |
-
value=0.95,
|
56 |
-
step=0.05,
|
57 |
-
label="Top-p (nucleus sampling)",
|
58 |
-
),
|
59 |
-
],
|
60 |
-
)
|
61 |
-
|
62 |
-
|
63 |
-
if __name__ == "__main__":
|
64 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|