sanbo commited on
Commit
59ea452
·
1 Parent(s): 124ac36

update sth. at 2025-01-16 22:31:26

Browse files
Files changed (3) hide show
  1. README.md +2 -1
  2. app.py +30 -44
  3. requirements.txt +6 -1
README.md CHANGED
@@ -31,4 +31,5 @@ curl -X POST https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings \
31
  "input": "Your text string goes here",
32
  "model": "jinaai/jina-embeddings-v3"
33
  }'
34
- ```
 
 
31
  "input": "Your text string goes here",
32
  "model": "jinaai/jina-embeddings-v3"
33
  }'
34
+ ```
35
+
app.py CHANGED
@@ -7,8 +7,8 @@ from fastapi.middleware.cors import CORSMiddleware
7
  from pydantic import BaseModel
8
  from typing import List, Dict
9
  from functools import lru_cache
10
- import uvicorn
11
  import numpy as np
 
12
 
13
  class EmbeddingRequest(BaseModel):
14
  input: str
@@ -25,9 +25,9 @@ class EmbeddingService:
25
  self.device = torch.device("cpu")
26
  self.model = None
27
  self.tokenizer = None
 
28
  self.setup_logging()
29
- # CPU优化
30
- torch.set_num_threads(4)
31
 
32
  def setup_logging(self):
33
  logging.basicConfig(
@@ -54,39 +54,28 @@ class EmbeddingService:
54
  self.logger.error(f"模型初始化失败: {str(e)}")
55
  raise
56
 
57
- async def _generate_embedding_internal(self, text: str) -> List[float]:
58
- """内部嵌入生成函数"""
59
- if not text.strip():
60
- raise ValueError("输入文本不能为空")
61
-
62
- inputs = self.tokenizer(
63
- text,
64
- return_tensors="pt",
65
- truncation=True,
66
- max_length=self.max_length,
67
- padding=True
68
- )
69
-
70
- with torch.no_grad():
71
- outputs = self.model(**inputs).last_hidden_state.mean(dim=1)
72
- return outputs.numpy().tolist()[0]
73
-
74
  @lru_cache(maxsize=1000)
75
- def get_cached_embedding(self, text: str) -> List[float]:
76
- """缓存包装函数"""
77
- loop = asyncio.new_event_loop()
78
- try:
79
- return loop.run_until_complete(self._generate_embedding_internal(text))
80
- finally:
81
- loop.close()
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- # 初始化服务
84
  embedding_service = EmbeddingService()
85
- app = FastAPI(
86
- title="Jina Embeddings API",
87
- description="Text embedding generation service using jina-embeddings-v3",
88
- version="1.0.0"
89
- )
90
 
91
  app.add_middleware(
92
  CORSMiddleware,
@@ -103,13 +92,16 @@ app.add_middleware(
103
  @app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
104
  async def generate_embeddings(request: EmbeddingRequest):
105
  try:
106
- embedding = embedding_service.get_cached_embedding(request.input)
 
 
 
 
 
107
  return EmbeddingResponse(
108
  status="success",
109
  embeddings=[embedding]
110
  )
111
- except ValueError as e:
112
- raise HTTPException(status_code=400, detail=str(e))
113
  except Exception as e:
114
  raise HTTPException(status_code=500, detail=str(e))
115
 
@@ -121,10 +113,9 @@ async def root():
121
  "device": str(embedding_service.device)
122
  }
123
 
124
- # Gradio界面
125
  def gradio_interface(text: str) -> Dict:
126
  try:
127
- embedding = embedding_service.get_cached_embedding(text)
128
  return {
129
  "status": "success",
130
  "embeddings": [embedding]
@@ -151,9 +142,4 @@ async def startup_event():
151
  if __name__ == "__main__":
152
  asyncio.run(embedding_service.initialize())
153
  gr.mount_gradio_app(app, iface, path="/ui")
154
- uvicorn.run(
155
- app,
156
- host="0.0.0.0",
157
- port=7860,
158
- workers=1
159
- )
 
7
  from pydantic import BaseModel
8
  from typing import List, Dict
9
  from functools import lru_cache
 
10
  import numpy as np
11
+ from threading import Lock
12
 
13
  class EmbeddingRequest(BaseModel):
14
  input: str
 
25
  self.device = torch.device("cpu")
26
  self.model = None
27
  self.tokenizer = None
28
+ self.lock = Lock()
29
  self.setup_logging()
30
+ torch.set_num_threads(4) # CPU优化
 
31
 
32
  def setup_logging(self):
33
  logging.basicConfig(
 
54
  self.logger.error(f"模型初始化失败: {str(e)}")
55
  raise
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  @lru_cache(maxsize=1000)
58
+ def get_embedding(self, text: str) -> List[float]:
59
+ """同步生成嵌入向量,带缓存"""
60
+ with self.lock:
61
+ try:
62
+ inputs = self.tokenizer(
63
+ text,
64
+ return_tensors="pt",
65
+ truncation=True,
66
+ max_length=self.max_length,
67
+ padding=True
68
+ )
69
+
70
+ with torch.no_grad():
71
+ outputs = self.model(**inputs).last_hidden_state.mean(dim=1)
72
+ return outputs.numpy().tolist()[0]
73
+ except Exception as e:
74
+ self.logger.error(f"生成嵌入向量失败: {str(e)}")
75
+ raise
76
 
 
77
  embedding_service = EmbeddingService()
78
+ app = FastAPI()
 
 
 
 
79
 
80
  app.add_middleware(
81
  CORSMiddleware,
 
92
  @app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
93
  async def generate_embeddings(request: EmbeddingRequest):
94
  try:
95
+ # 使用run_in_executor避免事件循环问题
96
+ embedding = await asyncio.get_running_loop().run_in_executor(
97
+ None,
98
+ embedding_service.get_embedding,
99
+ request.input
100
+ )
101
  return EmbeddingResponse(
102
  status="success",
103
  embeddings=[embedding]
104
  )
 
 
105
  except Exception as e:
106
  raise HTTPException(status_code=500, detail=str(e))
107
 
 
113
  "device": str(embedding_service.device)
114
  }
115
 
 
116
  def gradio_interface(text: str) -> Dict:
117
  try:
118
+ embedding = embedding_service.get_embedding(text)
119
  return {
120
  "status": "success",
121
  "embeddings": [embedding]
 
142
  if __name__ == "__main__":
143
  asyncio.run(embedding_service.initialize())
144
  gr.mount_gradio_app(app, iface, path="/ui")
145
+ uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)
 
 
 
 
 
requirements.txt CHANGED
@@ -3,4 +3,9 @@ torch
3
  einops
4
  fastapi
5
  uvicorn
6
- pydantic
 
 
 
 
 
 
3
  einops
4
  fastapi
5
  uvicorn
6
+ pydantic
7
+ gradio
8
+ numpy
9
+ python-multipart
10
+ sentencepiece
11
+ safetensors