robinroy03 commited on
Commit
51a45e9
·
verified ·
1 Parent(s): 4fe0cf6

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +7 -58
main.py CHANGED
@@ -1,61 +1,10 @@
1
- from fastapi import FastAPI
 
2
 
3
- from langchain_community.llms import LlamaCpp
4
- from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
5
- from langchain_core.prompts import PromptTemplate
6
 
7
- from langserve import add_routes
8
- from fastapi.middleware.cors import CORSMiddleware
9
 
10
- app = FastAPI(
11
- title="LangChain Server",
12
- version="1.0",
13
- description="A simple api server using Langchain's Runnable interfaces",
14
- )
15
-
16
- app.add_middleware(
17
- CORSMiddleware,
18
- allow_origins=['*'],
19
- allow_methods=['*'],
20
- allow_headers=['*'],
21
- allow_credentials=True
22
- )
23
-
24
-
25
- template = """Give a very concise one word answer to question.
26
- Question: {question}
27
- Answer:
28
- """
29
-
30
- prompt = PromptTemplate.from_template(template)
31
-
32
- # Callbacks support token-wise streaming
33
- callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
34
-
35
- n_gpu_layers = -1 # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
36
- n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
37
-
38
- # Make sure the model path is correct for your system!
39
- llm = LlamaCpp(
40
- model_path="Phi-3-mini-4k-instruct-q4.gguf",
41
- n_gpu_layers=n_gpu_layers,
42
- n_batch=n_batch,
43
- callback_manager=callback_manager,
44
- verbose=True, # Verbose is required to pass to the callback manager
45
- )
46
-
47
- add_routes(
48
- app,
49
- prompt | llm,
50
- path='/test'
51
- )
52
-
53
- # if __name__ == "__main__":
54
- # import uvicorn
55
-
56
- # uvicorn.run(app)
57
-
58
- # llm_chain = prompt | llm
59
-
60
- # question = "Hi"
61
- # x = llm_chain.invoke({"question": question})
 
1
+ from flask import Flask
2
+ from langchain_community.llms import Ollama
3
 
4
+ app = Flask(__name__)
 
 
5
 
6
+ llm = Ollama(model="phi3")
 
7
 
8
+ @app.route("/<lol>")
9
+ def test(lol):
10
+ return llm.invoke(lol)