Spaces:
Running
Running
sanbo
commited on
Commit
·
59ea452
1
Parent(s):
124ac36
update sth. at 2025-01-16 22:31:26
Browse files- README.md +2 -1
- app.py +30 -44
- requirements.txt +6 -1
README.md
CHANGED
@@ -31,4 +31,5 @@ curl -X POST https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings \
|
|
31 |
"input": "Your text string goes here",
|
32 |
"model": "jinaai/jina-embeddings-v3"
|
33 |
}'
|
34 |
-
```
|
|
|
|
31 |
"input": "Your text string goes here",
|
32 |
"model": "jinaai/jina-embeddings-v3"
|
33 |
}'
|
34 |
+
```
|
35 |
+
|
app.py
CHANGED
@@ -7,8 +7,8 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
7 |
from pydantic import BaseModel
|
8 |
from typing import List, Dict
|
9 |
from functools import lru_cache
|
10 |
-
import uvicorn
|
11 |
import numpy as np
|
|
|
12 |
|
13 |
class EmbeddingRequest(BaseModel):
|
14 |
input: str
|
@@ -25,9 +25,9 @@ class EmbeddingService:
|
|
25 |
self.device = torch.device("cpu")
|
26 |
self.model = None
|
27 |
self.tokenizer = None
|
|
|
28 |
self.setup_logging()
|
29 |
-
# CPU优化
|
30 |
-
torch.set_num_threads(4)
|
31 |
|
32 |
def setup_logging(self):
|
33 |
logging.basicConfig(
|
@@ -54,39 +54,28 @@ class EmbeddingService:
|
|
54 |
self.logger.error(f"模型初始化失败: {str(e)}")
|
55 |
raise
|
56 |
|
57 |
-
async def _generate_embedding_internal(self, text: str) -> List[float]:
|
58 |
-
"""内部嵌入生成函数"""
|
59 |
-
if not text.strip():
|
60 |
-
raise ValueError("输入文本不能为空")
|
61 |
-
|
62 |
-
inputs = self.tokenizer(
|
63 |
-
text,
|
64 |
-
return_tensors="pt",
|
65 |
-
truncation=True,
|
66 |
-
max_length=self.max_length,
|
67 |
-
padding=True
|
68 |
-
)
|
69 |
-
|
70 |
-
with torch.no_grad():
|
71 |
-
outputs = self.model(**inputs).last_hidden_state.mean(dim=1)
|
72 |
-
return outputs.numpy().tolist()[0]
|
73 |
-
|
74 |
@lru_cache(maxsize=1000)
|
75 |
-
def
|
76 |
-
"""
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
-
# 初始化服务
|
84 |
embedding_service = EmbeddingService()
|
85 |
-
app = FastAPI(
|
86 |
-
title="Jina Embeddings API",
|
87 |
-
description="Text embedding generation service using jina-embeddings-v3",
|
88 |
-
version="1.0.0"
|
89 |
-
)
|
90 |
|
91 |
app.add_middleware(
|
92 |
CORSMiddleware,
|
@@ -103,13 +92,16 @@ app.add_middleware(
|
|
103 |
@app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
|
104 |
async def generate_embeddings(request: EmbeddingRequest):
|
105 |
try:
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
107 |
return EmbeddingResponse(
|
108 |
status="success",
|
109 |
embeddings=[embedding]
|
110 |
)
|
111 |
-
except ValueError as e:
|
112 |
-
raise HTTPException(status_code=400, detail=str(e))
|
113 |
except Exception as e:
|
114 |
raise HTTPException(status_code=500, detail=str(e))
|
115 |
|
@@ -121,10 +113,9 @@ async def root():
|
|
121 |
"device": str(embedding_service.device)
|
122 |
}
|
123 |
|
124 |
-
# Gradio界面
|
125 |
def gradio_interface(text: str) -> Dict:
|
126 |
try:
|
127 |
-
embedding = embedding_service.
|
128 |
return {
|
129 |
"status": "success",
|
130 |
"embeddings": [embedding]
|
@@ -151,9 +142,4 @@ async def startup_event():
|
|
151 |
if __name__ == "__main__":
|
152 |
asyncio.run(embedding_service.initialize())
|
153 |
gr.mount_gradio_app(app, iface, path="/ui")
|
154 |
-
uvicorn.run(
|
155 |
-
app,
|
156 |
-
host="0.0.0.0",
|
157 |
-
port=7860,
|
158 |
-
workers=1
|
159 |
-
)
|
|
|
7 |
from pydantic import BaseModel
|
8 |
from typing import List, Dict
|
9 |
from functools import lru_cache
|
|
|
10 |
import numpy as np
|
11 |
+
from threading import Lock
|
12 |
|
13 |
class EmbeddingRequest(BaseModel):
|
14 |
input: str
|
|
|
25 |
self.device = torch.device("cpu")
|
26 |
self.model = None
|
27 |
self.tokenizer = None
|
28 |
+
self.lock = Lock()
|
29 |
self.setup_logging()
|
30 |
+
torch.set_num_threads(4) # CPU优化
|
|
|
31 |
|
32 |
def setup_logging(self):
|
33 |
logging.basicConfig(
|
|
|
54 |
self.logger.error(f"模型初始化失败: {str(e)}")
|
55 |
raise
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
@lru_cache(maxsize=1000)
|
58 |
+
def get_embedding(self, text: str) -> List[float]:
|
59 |
+
"""同步生成嵌入向量,带缓存"""
|
60 |
+
with self.lock:
|
61 |
+
try:
|
62 |
+
inputs = self.tokenizer(
|
63 |
+
text,
|
64 |
+
return_tensors="pt",
|
65 |
+
truncation=True,
|
66 |
+
max_length=self.max_length,
|
67 |
+
padding=True
|
68 |
+
)
|
69 |
+
|
70 |
+
with torch.no_grad():
|
71 |
+
outputs = self.model(**inputs).last_hidden_state.mean(dim=1)
|
72 |
+
return outputs.numpy().tolist()[0]
|
73 |
+
except Exception as e:
|
74 |
+
self.logger.error(f"生成嵌入向量失败: {str(e)}")
|
75 |
+
raise
|
76 |
|
|
|
77 |
embedding_service = EmbeddingService()
|
78 |
+
app = FastAPI()
|
|
|
|
|
|
|
|
|
79 |
|
80 |
app.add_middleware(
|
81 |
CORSMiddleware,
|
|
|
92 |
@app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
|
93 |
async def generate_embeddings(request: EmbeddingRequest):
|
94 |
try:
|
95 |
+
# 使用run_in_executor避免事件循环问题
|
96 |
+
embedding = await asyncio.get_running_loop().run_in_executor(
|
97 |
+
None,
|
98 |
+
embedding_service.get_embedding,
|
99 |
+
request.input
|
100 |
+
)
|
101 |
return EmbeddingResponse(
|
102 |
status="success",
|
103 |
embeddings=[embedding]
|
104 |
)
|
|
|
|
|
105 |
except Exception as e:
|
106 |
raise HTTPException(status_code=500, detail=str(e))
|
107 |
|
|
|
113 |
"device": str(embedding_service.device)
|
114 |
}
|
115 |
|
|
|
116 |
def gradio_interface(text: str) -> Dict:
|
117 |
try:
|
118 |
+
embedding = embedding_service.get_embedding(text)
|
119 |
return {
|
120 |
"status": "success",
|
121 |
"embeddings": [embedding]
|
|
|
142 |
if __name__ == "__main__":
|
143 |
asyncio.run(embedding_service.initialize())
|
144 |
gr.mount_gradio_app(app, iface, path="/ui")
|
145 |
+
uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -3,4 +3,9 @@ torch
|
|
3 |
einops
|
4 |
fastapi
|
5 |
uvicorn
|
6 |
-
pydantic
|
|
|
|
|
|
|
|
|
|
|
|
3 |
einops
|
4 |
fastapi
|
5 |
uvicorn
|
6 |
+
pydantic
|
7 |
+
gradio
|
8 |
+
numpy
|
9 |
+
python-multipart
|
10 |
+
sentencepiece
|
11 |
+
safetensors
|