Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -1,61 +1,10 @@
|
|
1 |
-
from
|
|
|
2 |
|
3 |
-
|
4 |
-
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
|
5 |
-
from langchain_core.prompts import PromptTemplate
|
6 |
|
7 |
-
|
8 |
-
from fastapi.middleware.cors import CORSMiddleware
|
9 |
|
10 |
-
app
|
11 |
-
|
12 |
-
|
13 |
-
description="A simple api server using Langchain's Runnable interfaces",
|
14 |
-
)
|
15 |
-
|
16 |
-
app.add_middleware(
|
17 |
-
CORSMiddleware,
|
18 |
-
allow_origins=['*'],
|
19 |
-
allow_methods=['*'],
|
20 |
-
allow_headers=['*'],
|
21 |
-
allow_credentials=True
|
22 |
-
)
|
23 |
-
|
24 |
-
|
25 |
-
template = """Give a very concise one word answer to question.
|
26 |
-
Question: {question}
|
27 |
-
Answer:
|
28 |
-
"""
|
29 |
-
|
30 |
-
prompt = PromptTemplate.from_template(template)
|
31 |
-
|
32 |
-
# Callbacks support token-wise streaming
|
33 |
-
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
34 |
-
|
35 |
-
n_gpu_layers = -1 # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
|
36 |
-
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
37 |
-
|
38 |
-
# Make sure the model path is correct for your system!
|
39 |
-
llm = LlamaCpp(
|
40 |
-
model_path="Phi-3-mini-4k-instruct-q4.gguf",
|
41 |
-
n_gpu_layers=n_gpu_layers,
|
42 |
-
n_batch=n_batch,
|
43 |
-
callback_manager=callback_manager,
|
44 |
-
verbose=True, # Verbose is required to pass to the callback manager
|
45 |
-
)
|
46 |
-
|
47 |
-
add_routes(
|
48 |
-
app,
|
49 |
-
prompt | llm,
|
50 |
-
path='/test'
|
51 |
-
)
|
52 |
-
|
53 |
-
# if __name__ == "__main__":
|
54 |
-
# import uvicorn
|
55 |
-
|
56 |
-
# uvicorn.run(app)
|
57 |
-
|
58 |
-
# llm_chain = prompt | llm
|
59 |
-
|
60 |
-
# question = "Hi"
|
61 |
-
# x = llm_chain.invoke({"question": question})
|
|
|
1 |
+
from flask import Flask
|
2 |
+
from langchain_community.llms import Ollama
|
3 |
|
4 |
+
app = Flask(__name__)
|
|
|
|
|
5 |
|
6 |
+
llm = Ollama(model="phi3")
|
|
|
7 |
|
8 |
+
@app.route("/<lol>")
|
9 |
+
def test(lol):
|
10 |
+
return llm.invoke(lol)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|