2ch commited on
Commit
a4616e4
·
verified ·
1 Parent(s): 2cf5962

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -21
app.py CHANGED
@@ -3,8 +3,10 @@ from json import dumps, loads
3
  from re import sub
4
  from urllib.parse import urlparse, urlunparse
5
 
 
6
  from fastapi import FastAPI, Request
7
  from fastapi.responses import JSONResponse, StreamingResponse, HTMLResponse
 
8
  from httpx import AsyncClient, Limits, Timeout
9
 
10
  app = FastAPI(title='PROXI-API')
@@ -70,35 +72,41 @@ def normalize_headers(request: Request, original_url: str) -> dict:
70
 
71
  def decode(chunk: bytes) -> str:
72
  try:
73
- content = chunk.decode('utf-8')
74
  except UnicodeDecodeError:
 
 
 
75
  try:
76
- content = chunk.decode('latin-1')
 
 
 
 
 
 
 
 
 
77
  except UnicodeDecodeError:
78
- try:
79
- content = chunk.decode('iso-8859-1')
80
- except UnicodeDecodeError:
81
- content = chunk.decode(errors='replace')
82
- return content
83
 
84
 
85
- def format_chunk(chunk: bytes, model: str) -> bytes | str | dict:
86
- chunk_id = 'chatcmpl-AQ8Lzxlg8eSCB1lgVmboiXwZiexqE'
87
- timestamp = int(datetime.now().timestamp())
88
-
89
  data = {
90
- "id": chunk_id,
91
- "object": "chat.completion.chunk",
92
- "created": timestamp,
93
- "model": model,
94
- "system_fingerprint": "fp_67802d9a6d",
95
- "choices": [
96
  {
97
- "index": 0,
98
- "delta": {
99
- "content": decode(chunk)
100
  },
101
- "finish_reason": None
102
  }
103
  ]
104
  }
 
3
  from re import sub
4
  from urllib.parse import urlparse, urlunparse
5
 
6
+ from cchardet import detect
7
  from fastapi import FastAPI, Request
8
  from fastapi.responses import JSONResponse, StreamingResponse, HTMLResponse
9
+ from ftfy import fix_text
10
  from httpx import AsyncClient, Limits, Timeout
11
 
12
  app = FastAPI(title='PROXI-API')
 
72
 
73
  def decode(chunk: bytes) -> str:
74
  try:
75
+ return fix_text(chunk.decode('utf-8'))
76
  except UnicodeDecodeError:
77
+ pass
78
+ detected = detect(chunk)
79
+ if detected['encoding'] and detected['confidence'] > 0.75:
80
  try:
81
+ return fix_text(chunk.decode(detected['encoding'].lower()))
82
+ except (UnicodeDecodeError, LookupError):
83
+ pass
84
+ for encoding in [
85
+ 'cp1251', 'cp1252', 'iso-8859-5', 'iso-8859-1', 'windows-1252',
86
+ 'gb18030', 'big5', 'gb2312', 'shift_jis', 'euc-kr', 'cp1256',
87
+ 'iso-8859-7', 'koi8-r', 'cp866', 'mac_cyrillic'
88
+ ]:
89
+ try:
90
+ return fix_text(chunk.decode(encoding))
91
  except UnicodeDecodeError:
92
+ continue
93
+ return fix_text(chunk.decode('latin-1', errors='replace'))
 
 
 
94
 
95
 
96
+ def format_chunk(chunk: bytes, model: str) -> str:
 
 
 
97
  data = {
98
+ 'id': 'chatcmpl-AQ8Lzxlg8eSCB1lgVmboiXwZiexqE',
99
+ 'object': 'chat.completion.chunk',
100
+ 'created': int(datetime.now().timestamp()),
101
+ 'model': model,
102
+ 'system_fingerprint': 'fp_67802d9a6d',
103
+ 'choices': [
104
  {
105
+ 'index': 0,
106
+ 'delta': {
107
+ 'content': decode(chunk)
108
  },
109
+ 'finish_reason': None
110
  }
111
  ]
112
  }