Spaces:
Running
Running
sanbo
commited on
Commit
·
e397647
1
Parent(s):
4921845
update sth. at 2025-01-16 23:44:43
Browse files- app.py1 +146 -0
- demo/README.md +38 -0
- demo/aiohttp_demo.py +49 -0
- demo/cloudscraper_demo.py +28 -0
- demo/general_curl_curlify_demo.py +16 -0
- demo/grequests_demo.py +35 -0
- demo/h11_demo.py +63 -0
- demo/httpcore_demo.py +38 -0
- demo/httpx_demo.py +75 -0
- demo/pycurl_demo.py +46 -0
- demo/requests_demo.py +27 -0
- demo/requirements.txt +14 -0
- demo/treq_demo.py +30 -0
- demo/urllib3_demo.py +33 -0
- demo/urllib_demo.py +32 -0
- requirements.txt +2 -1
app.py1
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import logging
|
3 |
+
import torch
|
4 |
+
import gradio as gr
|
5 |
+
from fastapi import FastAPI, HTTPException
|
6 |
+
from fastapi.middleware.cors import CORSMiddleware
|
7 |
+
from pydantic import BaseModel
|
8 |
+
from typing import List, Dict
|
9 |
+
from functools import lru_cache
|
10 |
+
import numpy as np
|
11 |
+
from threading import Lock
|
12 |
+
import uvicorn
|
13 |
+
|
14 |
+
class EmbeddingRequest(BaseModel):
|
15 |
+
input: str
|
16 |
+
model: str = "jinaai/jina-embeddings-v3"
|
17 |
+
|
18 |
+
class EmbeddingResponse(BaseModel):
|
19 |
+
status: str
|
20 |
+
embeddings: List[List[float]]
|
21 |
+
|
22 |
+
class EmbeddingService:
|
23 |
+
def __init__(self):
|
24 |
+
self.model_name = "jinaai/jina-embeddings-v3"
|
25 |
+
self.max_length = 512
|
26 |
+
self.device = torch.device("cpu")
|
27 |
+
self.model = None
|
28 |
+
self.tokenizer = None
|
29 |
+
self.lock = Lock()
|
30 |
+
self.setup_logging()
|
31 |
+
torch.set_num_threads(4) # CPU优化
|
32 |
+
|
33 |
+
def setup_logging(self):
|
34 |
+
logging.basicConfig(
|
35 |
+
level=logging.INFO,
|
36 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
37 |
+
)
|
38 |
+
self.logger = logging.getLogger(__name__)
|
39 |
+
|
40 |
+
async def initialize(self):
|
41 |
+
try:
|
42 |
+
from transformers import AutoTokenizer, AutoModel
|
43 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
44 |
+
self.model_name,
|
45 |
+
trust_remote_code=True
|
46 |
+
)
|
47 |
+
self.model = AutoModel.from_pretrained(
|
48 |
+
self.model_name,
|
49 |
+
trust_remote_code=True
|
50 |
+
).to(self.device)
|
51 |
+
self.model.eval()
|
52 |
+
torch.set_grad_enabled(False)
|
53 |
+
self.logger.info(f"模型加载成功,使用设备: {self.device}")
|
54 |
+
except Exception as e:
|
55 |
+
self.logger.error(f"模型初始化失败: {str(e)}")
|
56 |
+
raise
|
57 |
+
|
58 |
+
@lru_cache(maxsize=1000)
|
59 |
+
def get_embedding(self, text: str) -> List[float]:
|
60 |
+
"""同步生成嵌入向量,带缓存"""
|
61 |
+
with self.lock:
|
62 |
+
try:
|
63 |
+
inputs = self.tokenizer(
|
64 |
+
text,
|
65 |
+
return_tensors="pt",
|
66 |
+
truncation=True,
|
67 |
+
max_length=self.max_length,
|
68 |
+
padding=True
|
69 |
+
)
|
70 |
+
|
71 |
+
with torch.no_grad():
|
72 |
+
outputs = self.model(**inputs).last_hidden_state.mean(dim=1)
|
73 |
+
return outputs.numpy().tolist()[0]
|
74 |
+
except Exception as e:
|
75 |
+
self.logger.error(f"生成嵌入向量失败: {str(e)}")
|
76 |
+
raise
|
77 |
+
|
78 |
+
embedding_service = EmbeddingService()
|
79 |
+
app = FastAPI()
|
80 |
+
|
81 |
+
app.add_middleware(
|
82 |
+
CORSMiddleware,
|
83 |
+
allow_origins=["*"],
|
84 |
+
allow_credentials=True,
|
85 |
+
allow_methods=["*"],
|
86 |
+
allow_headers=["*"],
|
87 |
+
)
|
88 |
+
|
89 |
+
@app.post("/generate_embeddings", response_model=EmbeddingResponse)
|
90 |
+
@app.post("/api/v1/embeddings", response_model=EmbeddingResponse)
|
91 |
+
@app.post("/hf/v1/embeddings", response_model=EmbeddingResponse)
|
92 |
+
@app.post("/api/v1/chat/completions", response_model=EmbeddingResponse)
|
93 |
+
@app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
|
94 |
+
async def generate_embeddings(request: EmbeddingRequest):
|
95 |
+
try:
|
96 |
+
# 使用run_in_executor避免事件循环问题
|
97 |
+
embedding = await asyncio.get_running_loop().run_in_executor(
|
98 |
+
None,
|
99 |
+
embedding_service.get_embedding,
|
100 |
+
request.input
|
101 |
+
)
|
102 |
+
return EmbeddingResponse(
|
103 |
+
status="success",
|
104 |
+
embeddings=[embedding]
|
105 |
+
)
|
106 |
+
except Exception as e:
|
107 |
+
raise HTTPException(status_code=500, detail=str(e))
|
108 |
+
|
109 |
+
@app.get("/")
|
110 |
+
async def root():
|
111 |
+
return {
|
112 |
+
"status": "active",
|
113 |
+
"model": embedding_service.model_name,
|
114 |
+
"device": str(embedding_service.device)
|
115 |
+
}
|
116 |
+
|
117 |
+
def gradio_interface(text: str) -> Dict:
|
118 |
+
try:
|
119 |
+
embedding = embedding_service.get_embedding(text)
|
120 |
+
return {
|
121 |
+
"status": "success",
|
122 |
+
"embeddings": [embedding]
|
123 |
+
}
|
124 |
+
except Exception as e:
|
125 |
+
return {
|
126 |
+
"status": "error",
|
127 |
+
"message": str(e)
|
128 |
+
}
|
129 |
+
|
130 |
+
iface = gr.Interface(
|
131 |
+
fn=gradio_interface,
|
132 |
+
inputs=gr.Textbox(lines=3, label="输入文本"),
|
133 |
+
outputs=gr.JSON(label="嵌入向量结果"),
|
134 |
+
title="Jina Embeddings V3",
|
135 |
+
description="使用jina-embeddings-v3模型生成文本嵌入向量",
|
136 |
+
examples=[["这是一个测试句子。"]]
|
137 |
+
)
|
138 |
+
|
139 |
+
@app.on_event("startup")
|
140 |
+
async def startup_event():
|
141 |
+
await embedding_service.initialize()
|
142 |
+
|
143 |
+
if __name__ == "__main__":
|
144 |
+
asyncio.run(embedding_service.initialize())
|
145 |
+
gr.mount_gradio_app(app, iface, path="/ui")
|
146 |
+
uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)
|
demo/README.md
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 网络请求示例
|
2 |
+
|
3 |
+
简单HTTP请求:requests/httpx
|
4 |
+
异步操作:aiohttp/httpx
|
5 |
+
浏览器自动化:selenium/playwright
|
6 |
+
表单处理:mechanize
|
7 |
+
实时通信:websockets
|
8 |
+
底层控制:socket/pycurl
|
9 |
+
|
10 |
+
## 对比
|
11 |
+
|
12 |
+
| 库名 | 同步/异步 | 易用性(1-5) | 性能(1-5) | 适用场景 | 特点 | 优点 | 缺点 | 额外依赖 | 维护成本(1-5) |
|
13 |
+
| ------------ | --------- | ----------- | --------- | ------------ | ---------------- | --------------------- | ----------- | -------- | ------------- |
|
14 |
+
| requests | 同步 | 5 | 3 | 一般HTTP请求 | 简单直观 | API友好,文档丰富 | 不支持异步 | 是 | 1 |
|
15 |
+
| aiohttp | 异步 | 4 | 5 | 高并发 | 原生异步 | 高性能,WebSocket支持 | 配置较复杂 | 是 | 3 |
|
16 |
+
| httpx | 两者 | 5 | 4 | 现代应用 | 同时支持同步异步 | 类型提示,HTTP/2 | 相对较新 | 是 | 2 |
|
17 |
+
| httpcore | 两者 | 2 | 5 | 底层控制 | 底层实现 | 精细控制,高性能 | API复杂 | 是 | 4 |
|
18 |
+
| h11 | 同步 | 1 | 5 | 协议实现 | 最底层 | 完全控制 | 使用复杂 | 是 | 5 |
|
19 |
+
| urllib3 | 同步 | 3 | 3 | 基础功能 | 连接池管理 | 稳定可靠 | API不够现代 | 否 | 2 |
|
20 |
+
| urllib | 同步 | 2 | 2 | 基础功能 | 标准库 | 无需安装 | API复杂 | 否 | 3 |
|
21 |
+
| pycurl | 同步 | 2 | 5 | 多协议支持 | libcurl绑定 | 高性能,多协议 | 配置复杂 | 是 | 4 |
|
22 |
+
| grequests | 异步 | 4 | 4 | 并发请求 | 并发封装 | 易用的并发 | 功能局限 | 是 | 2 |
|
23 |
+
| cloudscraper | 同步 | 4 | 3 | 反爬虫 | 绕过保护 | 特殊场景支持 | 依赖较多 | 是 | 3 |
|
24 |
+
| treq | 异步 | 3 | 4 | Twisted项目 | 事件驱动 | 集成Twisted | 生态局限 | 是 | 4 |
|
25 |
+
|
26 |
+
补充说明:
|
27 |
+
1. 易用性:1分最难,5分最易用
|
28 |
+
2. 性能:1分最低,5分最高
|
29 |
+
3. 维护成本:1分最低,5分最高
|
30 |
+
4. 同步/异步:标明库的请求方式
|
31 |
+
5. 额外依赖:是否需要安装额外的包
|
32 |
+
|
33 |
+
这个表格可以帮助你:
|
34 |
+
1. 快速选择适合项目的HTTP库
|
35 |
+
2. 评估使用成本
|
36 |
+
3. 了解各个库的优缺点
|
37 |
+
4. 预估维护难度
|
38 |
+
|
demo/aiohttp_demo.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import aiohttp
|
2 |
+
import asyncio
|
3 |
+
import json
|
4 |
+
|
5 |
+
async def embeddings_run_async(input, url="https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings", model="jinaai/jina-embeddings-v3"):
|
6 |
+
headers = {
|
7 |
+
'Content-Type': 'application/json'
|
8 |
+
}
|
9 |
+
|
10 |
+
data = {
|
11 |
+
"input": input,
|
12 |
+
"model": model
|
13 |
+
}
|
14 |
+
|
15 |
+
async with aiohttp.ClientSession() as session:
|
16 |
+
async with session.post(url, headers=headers, json=data) as response:
|
17 |
+
if response.status == 200:
|
18 |
+
return await response.json()
|
19 |
+
else:
|
20 |
+
response.raise_for_status()
|
21 |
+
|
22 |
+
# 示例如何使用这个异步函数
|
23 |
+
async def main():
|
24 |
+
input_text = "Your text string goes here"
|
25 |
+
result = await embeddings_run_async(input_text)
|
26 |
+
print(f"---{result}")
|
27 |
+
|
28 |
+
# 运行异步函数
|
29 |
+
if __name__ == "__main__":
|
30 |
+
asyncio.run(main())
|
31 |
+
|
32 |
+
# 如果需要批量处理多个请求,可以这样使用:
|
33 |
+
async def batch_process():
|
34 |
+
inputs = [
|
35 |
+
"First text to process",
|
36 |
+
"Second text to process",
|
37 |
+
"Third text to process"
|
38 |
+
]
|
39 |
+
|
40 |
+
# 并发执行多个请求
|
41 |
+
tasks = [embeddings_run_async(text) for text in inputs]
|
42 |
+
results = await asyncio.gather(*tasks)
|
43 |
+
|
44 |
+
for i, result in enumerate(results):
|
45 |
+
print(f"Result {i+1}: {result}")
|
46 |
+
|
47 |
+
# 运行批量处理
|
48 |
+
if __name__ == "__main__":
|
49 |
+
asyncio.run(batch_process())
|
demo/cloudscraper_demo.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cloudscraper
|
2 |
+
import json
|
3 |
+
|
4 |
+
|
5 |
+
def embeddings_run(input,url= "https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings",model="jinaai/jina-embeddings-v3"):
|
6 |
+
|
7 |
+
|
8 |
+
headers = {
|
9 |
+
"Content-Type": "application/json"
|
10 |
+
}
|
11 |
+
|
12 |
+
data = {
|
13 |
+
"input": input,
|
14 |
+
"model":model
|
15 |
+
}
|
16 |
+
|
17 |
+
scraper = cloudscraper.create_scraper()
|
18 |
+
response = scraper.post(url, headers=headers, data=json.dumps(data))
|
19 |
+
response.encoding="utf-8"
|
20 |
+
response.raise_for_status()
|
21 |
+
|
22 |
+
if response.status_code == 200:
|
23 |
+
return response.json()
|
24 |
+
|
25 |
+
if __name__ == "__main__":
|
26 |
+
input = "Your text string goes here"
|
27 |
+
print(f"---{embeddings_run(input)}")
|
28 |
+
|
demo/general_curl_curlify_demo.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import curlify
|
3 |
+
|
4 |
+
def get_curl_command(input_text, url="https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings"):
|
5 |
+
headers = {
|
6 |
+
"Content-Type": "application/json"
|
7 |
+
}
|
8 |
+
data = {
|
9 |
+
"input": input_text,
|
10 |
+
"model": "jinaai/jina-embeddings-v3"
|
11 |
+
}
|
12 |
+
|
13 |
+
r = requests.post(url, headers=headers, json=data)
|
14 |
+
return curlify.to_curl(r.request)
|
15 |
+
|
16 |
+
print(get_curl_command("Your text string goes here"))
|
demo/grequests_demo.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import grequests
|
2 |
+
import json
|
3 |
+
|
4 |
+
def embeddings_run(inputs, url="https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings", model="jinaai/jina-embeddings-v3"):
|
5 |
+
headers = {
|
6 |
+
"Content-Type": "application/json"
|
7 |
+
}
|
8 |
+
|
9 |
+
# 支持单个或批量请求
|
10 |
+
if isinstance(inputs, str):
|
11 |
+
inputs = [inputs]
|
12 |
+
|
13 |
+
# 创建请求列表
|
14 |
+
requests = [
|
15 |
+
grequests.post(
|
16 |
+
url,
|
17 |
+
headers=headers,
|
18 |
+
json={"input": input, "model": model}
|
19 |
+
) for input in inputs
|
20 |
+
]
|
21 |
+
|
22 |
+
# 并发执行请求
|
23 |
+
responses = grequests.map(requests)
|
24 |
+
|
25 |
+
# 处理响应
|
26 |
+
results = []
|
27 |
+
for response in responses:
|
28 |
+
if response and response.status_code == 200:
|
29 |
+
results.append(response.json())
|
30 |
+
|
31 |
+
return results[0] if len(inputs) == 1 else results
|
32 |
+
|
33 |
+
if __name__ == "__main__":
|
34 |
+
input_text = "Your text string goes here"
|
35 |
+
print(f"---{embeddings_run(input_text)}")
|
demo/h11_demo.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import h11
|
2 |
+
import socket
|
3 |
+
import json
|
4 |
+
import ssl
|
5 |
+
import urllib.parse
|
6 |
+
|
7 |
+
def embeddings_run_h11(input_text, url="https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings"):
|
8 |
+
parsed_url = urllib.parse.urlparse(url)
|
9 |
+
|
10 |
+
# 创建SSL上下文
|
11 |
+
context = ssl.create_default_context()
|
12 |
+
|
13 |
+
# 创建普通socket
|
14 |
+
sock = socket.create_connection((parsed_url.hostname, 443))
|
15 |
+
|
16 |
+
# 包装成SSL socket
|
17 |
+
sock = context.wrap_socket(sock, server_hostname=parsed_url.hostname)
|
18 |
+
|
19 |
+
conn = h11.Connection(our_role=h11.CLIENT)
|
20 |
+
|
21 |
+
data = json.dumps({
|
22 |
+
"input": input_text,
|
23 |
+
"model": "jinaai/jina-embeddings-v3"
|
24 |
+
})
|
25 |
+
|
26 |
+
request = h11.Request(
|
27 |
+
method="POST",
|
28 |
+
target=parsed_url.path,
|
29 |
+
headers=[
|
30 |
+
("Host", parsed_url.hostname),
|
31 |
+
("Content-Type", "application/json"),
|
32 |
+
("Content-Length", str(len(data))),
|
33 |
+
("Connection", "close") # 添加这个头
|
34 |
+
]
|
35 |
+
)
|
36 |
+
|
37 |
+
sock.send(conn.send(request))
|
38 |
+
sock.send(conn.send(h11.Data(data=data.encode())))
|
39 |
+
sock.send(conn.send(h11.EndOfMessage()))
|
40 |
+
|
41 |
+
response = b""
|
42 |
+
while True:
|
43 |
+
event = conn.next_event()
|
44 |
+
if event is h11.NEED_DATA:
|
45 |
+
data = sock.recv(2048)
|
46 |
+
if not data: # 连接关闭
|
47 |
+
break
|
48 |
+
conn.receive_data(data)
|
49 |
+
continue
|
50 |
+
if isinstance(event, h11.EndOfMessage):
|
51 |
+
break
|
52 |
+
if isinstance(event, h11.Data):
|
53 |
+
response += event.data
|
54 |
+
|
55 |
+
sock.close()
|
56 |
+
return json.loads(response)
|
57 |
+
|
58 |
+
if __name__ == "__main__":
|
59 |
+
try:
|
60 |
+
result = embeddings_run_h11("Your text string goes here")
|
61 |
+
print(f"---{result}")
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error: {e}")
|
demo/httpcore_demo.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import httpcore
|
2 |
+
import json
|
3 |
+
import asyncio
|
4 |
+
import ssl
|
5 |
+
from urllib.parse import urlparse
|
6 |
+
|
7 |
+
async def embeddings_run_httpcore(input_text, url="https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings"):
|
8 |
+
ssl_context = ssl.create_default_context()
|
9 |
+
async with httpcore.AsyncConnectionPool(ssl_context=ssl_context) as http:
|
10 |
+
data = {
|
11 |
+
"input": input_text,
|
12 |
+
"model": "jinaai/jina-embeddings-v3"
|
13 |
+
}
|
14 |
+
|
15 |
+
response = await http.request(
|
16 |
+
method=b"POST",
|
17 |
+
url=url.encode(),
|
18 |
+
headers=[
|
19 |
+
(b"content-type", b"application/json"),
|
20 |
+
(b"accept", b"application/json"),
|
21 |
+
],
|
22 |
+
content=json.dumps(data).encode()
|
23 |
+
)
|
24 |
+
|
25 |
+
if response.status == 200:
|
26 |
+
return json.loads(response.content)
|
27 |
+
else:
|
28 |
+
raise Exception(f"Request failed with status {response.status}")
|
29 |
+
|
30 |
+
async def main():
|
31 |
+
try:
|
32 |
+
result = await embeddings_run_httpcore("Your text string goes here")
|
33 |
+
print(f"---{result}")
|
34 |
+
except Exception as e:
|
35 |
+
print(f"Error: {e}")
|
36 |
+
|
37 |
+
if __name__ == "__main__":
|
38 |
+
asyncio.run(main())
|
demo/httpx_demo.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import httpx
|
2 |
+
import asyncio
|
3 |
+
import json
|
4 |
+
from typing import List, Union
|
5 |
+
|
6 |
+
# 同步版本
|
7 |
+
def embeddings_run_sync(input: Union[str, List[str]],
|
8 |
+
url: str = "https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings",
|
9 |
+
model: str = "jinaai/jina-embeddings-v3") -> dict:
|
10 |
+
headers = {
|
11 |
+
'Content-Type': 'application/json'
|
12 |
+
}
|
13 |
+
|
14 |
+
data = {
|
15 |
+
"input": input,
|
16 |
+
"model": model
|
17 |
+
}
|
18 |
+
|
19 |
+
with httpx.Client() as client:
|
20 |
+
response = client.post(url, headers=headers, json=data)
|
21 |
+
response.raise_for_status()
|
22 |
+
return response.json()
|
23 |
+
|
24 |
+
# 异步版本
|
25 |
+
async def embeddings_run_async(input: Union[str, List[str]],
|
26 |
+
url: str = "https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings",
|
27 |
+
model: str = "jinaai/jina-embeddings-v3") -> dict:
|
28 |
+
headers = {
|
29 |
+
'Content-Type': 'application/json'
|
30 |
+
}
|
31 |
+
|
32 |
+
data = {
|
33 |
+
"input": input,
|
34 |
+
"model": model
|
35 |
+
}
|
36 |
+
|
37 |
+
async with httpx.AsyncClient() as client:
|
38 |
+
response = await client.post(url, headers=headers, json=data)
|
39 |
+
response.raise_for_status()
|
40 |
+
return response.json()
|
41 |
+
|
42 |
+
# 批量处理的异步函数
|
43 |
+
async def batch_process_async(texts: List[str]) -> List[dict]:
|
44 |
+
async with httpx.AsyncClient() as client:
|
45 |
+
tasks = []
|
46 |
+
for text in texts:
|
47 |
+
task = embeddings_run_async(text)
|
48 |
+
tasks.append(task)
|
49 |
+
return await asyncio.gather(*tasks)
|
50 |
+
|
51 |
+
# 使用示例
|
52 |
+
if __name__ == "__main__":
|
53 |
+
# 1. 同步方式使用
|
54 |
+
input_text = "Your text string goes here"
|
55 |
+
result = embeddings_run_sync(input_text)
|
56 |
+
print(f"Sync result: {result}")
|
57 |
+
|
58 |
+
# 2. 异步方式使用
|
59 |
+
async def main():
|
60 |
+
# 单个异步请求
|
61 |
+
result = await embeddings_run_async(input_text)
|
62 |
+
print(f"Async single result: {result}")
|
63 |
+
|
64 |
+
# 批量异步请求
|
65 |
+
texts = [
|
66 |
+
"First text to process",
|
67 |
+
"Second text to process",
|
68 |
+
"Third text to process"
|
69 |
+
]
|
70 |
+
results = await batch_process_async(texts)
|
71 |
+
for i, result in enumerate(results):
|
72 |
+
print(f"Batch result {i+1}: {result}")
|
73 |
+
|
74 |
+
# 运行异步函数
|
75 |
+
asyncio.run(main())
|
demo/pycurl_demo.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pycurl
|
2 |
+
from io import BytesIO
|
3 |
+
import json
|
4 |
+
|
5 |
+
def embeddings_run(input, url="https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings", model="jinaai/jina-embeddings-v3"):
|
6 |
+
# 准备数据
|
7 |
+
data = json.dumps({
|
8 |
+
"input": input,
|
9 |
+
"model": model
|
10 |
+
})
|
11 |
+
|
12 |
+
# 创建缓冲区存储响应
|
13 |
+
buffer = BytesIO()
|
14 |
+
|
15 |
+
# 初始化 pycurl
|
16 |
+
c = pycurl.Curl()
|
17 |
+
|
18 |
+
# 设置请求参数
|
19 |
+
c.setopt(c.URL, url)
|
20 |
+
c.setopt(c.WRITEDATA, buffer)
|
21 |
+
c.setopt(c.POST, 1)
|
22 |
+
c.setopt(c.POSTFIELDS, data)
|
23 |
+
c.setopt(c.HTTPHEADER, [
|
24 |
+
'Content-Type: application/json',
|
25 |
+
f'Content-Length: {len(data)}'
|
26 |
+
])
|
27 |
+
|
28 |
+
try:
|
29 |
+
# 执行请求
|
30 |
+
c.perform()
|
31 |
+
|
32 |
+
# 检查状态码
|
33 |
+
status_code = c.getinfo(pycurl.HTTP_CODE)
|
34 |
+
if status_code == 200:
|
35 |
+
# 获取响应数据
|
36 |
+
response_data = buffer.getvalue().decode('utf-8')
|
37 |
+
return json.loads(response_data)
|
38 |
+
else:
|
39 |
+
raise Exception(f"Request failed with status code: {status_code}")
|
40 |
+
finally:
|
41 |
+
c.close()
|
42 |
+
buffer.close()
|
43 |
+
|
44 |
+
if __name__ == "__main__":
|
45 |
+
input_text = "Your text string goes here"
|
46 |
+
print(f"---{embeddings_run(input_text)}")
|
demo/requests_demo.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
|
4 |
+
|
5 |
+
def embeddings_run(input,url= "https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings",model="jinaai/jina-embeddings-v3"):
|
6 |
+
|
7 |
+
|
8 |
+
headers = {
|
9 |
+
"Content-Type": "application/json"
|
10 |
+
}
|
11 |
+
|
12 |
+
data = {
|
13 |
+
"input": input,
|
14 |
+
"model":model
|
15 |
+
}
|
16 |
+
|
17 |
+
response = requests.post(url, headers=headers, data=json.dumps(data))
|
18 |
+
response.encoding="utf-8"
|
19 |
+
response.raise_for_status()
|
20 |
+
|
21 |
+
if response.status_code == 200:
|
22 |
+
return response.json()
|
23 |
+
|
24 |
+
if __name__ == "__main__":
|
25 |
+
input = "Your text string goes here"
|
26 |
+
print(f"---{embeddings_run(input)}")
|
27 |
+
|
demo/requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
requests
|
2 |
+
cloudscraper
|
3 |
+
aiohttp
|
4 |
+
asyncio
|
5 |
+
httpx
|
6 |
+
selenium
|
7 |
+
mechanize
|
8 |
+
grequests
|
9 |
+
pycurl
|
10 |
+
playwright
|
11 |
+
twisted
|
12 |
+
treq
|
13 |
+
h11
|
14 |
+
curlify
|
demo/treq_demo.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from twisted.internet import reactor, defer
|
2 |
+
from treq import post
|
3 |
+
import json
|
4 |
+
|
5 |
+
@defer.inlineCallbacks
|
6 |
+
def embeddings_run_treq(input_text, url="https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings"):
|
7 |
+
headers = {
|
8 |
+
"Content-Type": "application/json"
|
9 |
+
}
|
10 |
+
data = {
|
11 |
+
"input": input_text,
|
12 |
+
"model": "jinaai/jina-embeddings-v3"
|
13 |
+
}
|
14 |
+
|
15 |
+
response = yield post(
|
16 |
+
url,
|
17 |
+
headers=headers,
|
18 |
+
json=data
|
19 |
+
)
|
20 |
+
content = yield response.json()
|
21 |
+
defer.returnValue(content)
|
22 |
+
|
23 |
+
def main():
|
24 |
+
d = embeddings_run_treq("Your text string goes here")
|
25 |
+
d.addCallback(lambda result: print(f"---{result}"))
|
26 |
+
d.addBoth(lambda _: reactor.stop())
|
27 |
+
reactor.run()
|
28 |
+
|
29 |
+
if __name__ == "__main__":
|
30 |
+
main()
|
demo/urllib3_demo.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import urllib3
|
2 |
+
import json
|
3 |
+
|
4 |
+
def embeddings_run(input, url="https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings", model="jinaai/jina-embeddings-v3"):
|
5 |
+
# 创建 PoolManager 实例
|
6 |
+
http = urllib3.PoolManager()
|
7 |
+
|
8 |
+
headers = {
|
9 |
+
'Content-Type': 'application/json'
|
10 |
+
}
|
11 |
+
|
12 |
+
data = {
|
13 |
+
"input": input,
|
14 |
+
"model": model
|
15 |
+
}
|
16 |
+
|
17 |
+
# 发送 POST 请求
|
18 |
+
response = http.request(
|
19 |
+
'POST',
|
20 |
+
url,
|
21 |
+
headers=headers,
|
22 |
+
body=json.dumps(data).encode('utf-8')
|
23 |
+
)
|
24 |
+
|
25 |
+
# 检查响应状态
|
26 |
+
if response.status == 200:
|
27 |
+
return json.loads(response.data.decode('utf-8'))
|
28 |
+
else:
|
29 |
+
response.raise_for_status()
|
30 |
+
|
31 |
+
if __name__ == "__main__":
|
32 |
+
input = "Your text string goes here"
|
33 |
+
print(f"---{embeddings_run(input)}")
|
demo/urllib_demo.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from urllib.request import Request, urlopen
|
2 |
+
import json
|
3 |
+
|
4 |
+
def embeddings_run(input, url="https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings", model="jinaai/jina-embeddings-v3"):
|
5 |
+
headers = {
|
6 |
+
"Content-Type": "application/json"
|
7 |
+
}
|
8 |
+
|
9 |
+
data = {
|
10 |
+
"input": input,
|
11 |
+
"model": model
|
12 |
+
}
|
13 |
+
|
14 |
+
# 创建请求
|
15 |
+
request = Request(
|
16 |
+
url,
|
17 |
+
headers=headers,
|
18 |
+
data=json.dumps(data).encode('utf-8'),
|
19 |
+
method='POST'
|
20 |
+
)
|
21 |
+
|
22 |
+
# 发送请求并获取响应
|
23 |
+
try:
|
24 |
+
with urlopen(request) as response:
|
25 |
+
if response.status == 200:
|
26 |
+
return json.loads(response.read().decode('utf-8'))
|
27 |
+
except Exception as e:
|
28 |
+
raise Exception(f"Request failed: {str(e)}")
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
input_text = "Your text string goes here"
|
32 |
+
print(f"---{embeddings_run(input_text)}")
|
requirements.txt
CHANGED
@@ -8,4 +8,5 @@ gradio
|
|
8 |
numpy
|
9 |
python-multipart
|
10 |
sentencepiece
|
11 |
-
safetensors
|
|
|
|
8 |
numpy
|
9 |
python-multipart
|
10 |
sentencepiece
|
11 |
+
safetensors
|
12 |
+
|