Tharindu1527 commited on
Commit
560fd9c
·
1 Parent(s): 0474452

Initial commit

Browse files
Files changed (4) hide show
  1. Qbot.py +184 -0
  2. README.md +3 -11
  3. app.py +0 -64
  4. requirements.txt +0 -0
Qbot.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.llms import HuggingFaceHub
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain.chains import RetrievalQA
7
+ import gradio as gr
8
+
9
+ #ignoring Unnecessary warnings
10
+ def warn(*arg, **kwargs):
11
+ pass
12
+ import warnings
13
+ warnings.warn = warn
14
+ warnings.filterwarnings("ignore")
15
+
16
+ #Initialize LLM
17
+
18
+ def get_llm(model_id, temperature, max_new_tokens):
19
+
20
+ huggingface_api_key = ""
21
+
22
+ #initialize the pipeline
23
+ Qbot_llm = HuggingFaceHub(
24
+ repo_id=model_id,
25
+ model_kwargs={"max_new_tokens": max_new_tokens, "temperature": temperature},
26
+ huggingfacehub_api_token=huggingface_api_key
27
+ )
28
+ return Qbot_llm
29
+
30
+ #Document Loader
31
+ def document_loader(file):
32
+ loader = PyPDFLoader(file.name)
33
+ loaded_document = loader.load()
34
+ return loaded_document
35
+
36
+ #Define Text Splitter
37
+ def text_splitter(data):
38
+ text_splitter = RecursiveCharacterTextSplitter(
39
+ chunk_size = 1000,
40
+ chunk_overlap = 50,
41
+ length_function = len,
42
+ )
43
+ chunks = text_splitter.split_documents(data)
44
+ return chunks
45
+
46
+ #Define Vector store
47
+ def vector_database(chunks, embedding_model_name):
48
+ embedding_model = HuggingFaceEmbeddings(model_name = embedding_model_name)
49
+ vectordb = Chroma.from_documents(chunks, embedding_model)
50
+ return vectordb
51
+
52
+ #Define Embedding Model
53
+ def huggingface_embeddings(model_name):
54
+ huggingface_embedding = HuggingFaceEmbeddings(model_name=model_name)
55
+
56
+ return huggingface_embedding
57
+
58
+ #Define Retriever
59
+ def retriever(file, embedding_model_name):
60
+ splits = document_loader(file)
61
+ chunks = text_splitter(splits)
62
+ vectordb = vector_database(chunks, embedding_model_name)
63
+ retriever = vectordb.as_retriever()
64
+ return retriever
65
+
66
+ #Define a Question Answering Chain
67
+ #QA chain
68
+ def retriever_qa(file, query, llm_model, temperature, max_new_tokens, embedding_model):
69
+ llm = get_llm(llm_model, temperature, max_new_tokens)
70
+ retriever_obj = retriever(file, embedding_model)
71
+ qa = RetrievalQA.from_chain_type(llm = llm,
72
+ chain_type = "stuff",
73
+ retriever = retriever_obj,
74
+ return_source_documents = False
75
+ )
76
+ response = qa.invoke(query)
77
+ return response["result"]
78
+
79
+ llm_models = [
80
+ "EleutherAI/gpt-neo-2.7B",
81
+ "google/flan-t5-large",
82
+ "google/flan-t5-xl",
83
+ ]
84
+
85
+ embedding_models = [
86
+ "sentence-transformers/all-distilroberta-v1",
87
+ "sentence-transformers/all-mpnet-base-v2",
88
+ "sentence-transformers/all-MiniLM-L6-v2",
89
+ ]
90
+
91
+ # CSS for custom styling
92
+ custom_css = """
93
+ #component-0 {
94
+ max-width: 800px;
95
+ margin: auto;
96
+ padding: 20px;
97
+ }
98
+ .gradio-container {
99
+ font-family: 'Arial', sans-serif;
100
+ }
101
+ .gr-button {
102
+ background: linear-gradient(90deg, #4CAF50 0%, #45a049 100%);
103
+ border: none;
104
+ color: white;
105
+ }
106
+ .gr-button:hover {
107
+ background: linear-gradient(90deg, #45a049 0%, #4CAF50 100%);
108
+ transform: translateY(-2px);
109
+ box-shadow: 0 5px 15px rgba(0,0,0,0.1);
110
+ }
111
+ .gr-form {
112
+ background-color: #ffffff;
113
+ border-radius: 12px;
114
+ padding: 20px;
115
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
116
+ }
117
+ .gr-box {
118
+ border-radius: 8px;
119
+ border: 1px solid #e0e0e0;
120
+ }
121
+ """
122
+
123
+ #Gradio Interface
124
+ rag_application = gr.Interface(
125
+ fn = retriever_qa,
126
+ allow_flagging = "never",
127
+ inputs=[
128
+ gr.File(label="Upload PDF File", file_count="single", file_types=[".pdf"],type="filepath", elem_classes="gr-box"), # Drag and drop file upload
129
+ gr.Textbox(label = "Input Query", lines=2, placeholder="Type your question here...", elem_classes="gr-box"),
130
+ gr.Dropdown(choices=llm_models, value="EleutherAI/gpt-neo-2.7B", label="LLM Model", elem_classes="gr-box"),
131
+ gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.1, label="Temperature", elem_classes="gr-box"),
132
+ gr.Slider(minimum=64, maximum=512, value=128, step=32, label="Max Tokens", elem_classes="gr-box"),
133
+ gr.Dropdown(choices=embedding_models, value="sentence-transformers/all-distilroberta-v1", label="Embedding Model", elem_classes="gr-box")
134
+ ],
135
+ outputs=gr.Textbox(label="Output"),
136
+ title = "📚 QBot - Your PDF Assistant",
137
+ description="""
138
+ ### Welcome to QBot - Your Intelligent PDF Analysis Companion!
139
+
140
+ Transform any PDF document into an interactive knowledge base. Ask questions naturally and get precise answers powered by advanced language models.
141
+
142
+ #### Features:
143
+ 🔍 Intelligent PDF Processing
144
+ 💡 Multiple Language Models
145
+ 🎯 Customizable Response Settings
146
+ 🔄 Various Embedding Options
147
+
148
+ #### How to Use:
149
+ 1. **Upload PDF**: Drop your document in the file uploader
150
+ 2. **Ask Questions**: Type any question about your document
151
+ 3. **Customize Settings**:
152
+ - Choose your preferred Language Model
153
+ - Adjust Temperature (0-1) for response creativity
154
+ - Set Max Tokens for response length
155
+ - Select Embedding Model for document processing
156
+ 4. **Get Answers**: Receive AI-powered responses from your document
157
+ """,
158
+ article="""
159
+ #### Advanced Tips:
160
+ 📊 **Model Selection**:
161
+ - GPT-Neo 2.7B: Best for general-purpose queries
162
+ - FLAN-T5 Large: Efficient for straightforward questions
163
+ - FLAN-T5 XL: Ideal for complex analysis
164
+
165
+ 🎛️ **Parameter Guide**:
166
+ - Temperature: Lower (0.1-0.4) for factual, Higher (0.6-0.9) for creative
167
+ - Max Tokens: 128 for brief answers, 256+ for detailed explanations
168
+ - Embedding Models: Choose based on document complexity and language
169
+
170
+ 💫 Powered by LangChain and Hugging Face
171
+ Made with 🤖 for seamless document interaction
172
+ """,
173
+ theme=gr.themes.Soft(
174
+ primary_hue="green",
175
+ secondary_hue="gray",
176
+ neutral_hue="gray",
177
+ radius_size=gr.themes.sizes.radius_sm,
178
+ font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"]
179
+ ),
180
+ css=custom_css
181
+ )
182
+
183
+ #Launch app
184
+ rag_application.launch(share=True)
README.md CHANGED
@@ -1,14 +1,6 @@
1
  ---
2
- title: Gradio Space
3
- emoji: 💬
4
- colorFrom: yellow
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.0.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: Geadio app
12
  ---
13
-
14
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
1
  ---
2
+ title: Qbot
3
+ app_file: Qbot.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.44.0
 
 
 
 
6
  ---
 
 
app.py DELETED
@@ -1,64 +0,0 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
-
62
-
63
- if __name__ == "__main__":
64
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ