Update src/utils.js
Browse files- src/utils.js +94 -108
src/utils.js
CHANGED
@@ -1,125 +1,111 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
const byteLength = bytes.length
|
5 |
-
|
6 |
-
// Calculate lengths and fields similar to Python version
|
7 |
-
const FIXED_HEADER = 2
|
8 |
-
const SEPARATOR = 1
|
9 |
-
const FIXED_SUFFIX_LENGTH = 0xA3 + modelName.length
|
10 |
-
|
11 |
-
// 计算文本长度字段 (类似 Python 中的 base_length1)
|
12 |
-
let textLengthField1, textLengthFieldSize1
|
13 |
-
if (byteLength < 128) {
|
14 |
-
textLengthField1 = byteLength.toString(16).padStart(2, '0')
|
15 |
-
textLengthFieldSize1 = 1
|
16 |
-
} else {
|
17 |
-
const lowByte1 = (byteLength & 0x7F) | 0x80
|
18 |
-
const highByte1 = (byteLength >> 7) & 0xFF
|
19 |
-
textLengthField1 = lowByte1.toString(16).padStart(2, '0') + highByte1.toString(16).padStart(2, '0')
|
20 |
-
textLengthFieldSize1 = 2
|
21 |
-
}
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
|
37 |
-
const
|
38 |
-
|
39 |
-
|
40 |
-
const messageLengthHex = messageTotalLength.toString(16).padStart(10, '0')
|
41 |
-
|
42 |
-
// 构造完整的十六进制字符串
|
43 |
-
const hexString = (
|
44 |
-
messageLengthHex +
|
45 |
-
'12' +
|
46 |
-
textLengthField +
|
47 |
-
'0A' +
|
48 |
-
textLengthField1 +
|
49 |
-
bytes.toString('hex') +
|
50 |
-
'10016A2432343163636435662D393162612D343131382D393239612D3936626330313631626432612' +
|
51 |
-
'2002A132F643A2F6964656150726F2F656475626F73733A1E0A' +
|
52 |
-
// 将模型名称长度转换为两位十六进制,并确保是大写
|
53 |
-
Buffer.from(modelName, 'utf-8').length.toString(16).padStart(2, '0').toUpperCase() +
|
54 |
-
Buffer.from(modelName, 'utf-8').toString('hex').toUpperCase() +
|
55 |
-
'22004A' +
|
56 |
-
'24' + '61383761396133342D323164642D343863372D623434662D616636633365636536663765' +
|
57 |
-
'680070007A2436393337376535612D386332642D343835342D623564392D653062623232336163303061' +
|
58 |
-
'800101B00100C00100E00100E80100'
|
59 |
-
).toUpperCase()
|
60 |
-
return Buffer.from(hexString, 'hex')
|
61 |
}
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
if (!(chunk[0] === 0x00 && chunk[1] === 0x00)) {
|
67 |
-
return ''
|
68 |
-
}
|
69 |
|
70 |
-
|
71 |
-
|
72 |
|
73 |
-
|
74 |
-
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
86 |
}
|
87 |
|
88 |
-
if (
|
89 |
-
|
90 |
-
i++ // 跳过 0x0C
|
91 |
-
while (i < chunk.length && chunk[i] === 0x0A) {
|
92 |
-
i++ // 跳过所有连续的 0x0A
|
93 |
-
}
|
94 |
-
} else if (
|
95 |
-
i > 0 &&
|
96 |
-
chunk[i] === 0x0A &&
|
97 |
-
chunk[i - 1] >= 0x00 &&
|
98 |
-
chunk[i - 1] <= 0x09
|
99 |
-
) {
|
100 |
-
// 如果当前字节是 0x0A,且前一个字节在 0x00 至 0x09 之间,跳过前一个字节和当前字节
|
101 |
-
filteredChunk.pop() // 移除已添加的前一个字节
|
102 |
-
i++ // 跳过当前的 0x0A
|
103 |
-
} else {
|
104 |
-
filteredChunk.push(chunk[i])
|
105 |
-
i++
|
106 |
}
|
|
|
|
|
|
|
107 |
}
|
|
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
}
|
121 |
|
122 |
module.exports = {
|
123 |
stringToHex,
|
124 |
-
chunkToUtf8String
|
125 |
-
|
|
|
|
1 |
+
const { v4: uuidv4 } = require('uuid');
|
2 |
+
const zlib = require('zlib');
|
3 |
+
const $root = require('./message.js');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
+
const regex = /<\|BEGIN_SYSTEM\|>.*?<\|END_SYSTEM\|>.*?<\|BEGIN_USER\|>.*?<\|END_USER\|>/s;
|
6 |
+
|
7 |
+
async function stringToHex(messages, modelName) {
|
8 |
+
const formattedMessages = messages.map((msg) => ({
|
9 |
+
...msg,
|
10 |
+
role: msg.role === 'user' ? 1 : 2,
|
11 |
+
message_id: uuidv4(),
|
12 |
+
}));
|
13 |
+
|
14 |
+
const message = {
|
15 |
+
messages: formattedMessages,
|
16 |
+
instructions: {
|
17 |
+
instruction: 'Always respond in 中文',
|
18 |
+
},
|
19 |
+
projectPath: '/path/to/project',
|
20 |
+
model: {
|
21 |
+
name: modelName,
|
22 |
+
empty: '',
|
23 |
+
},
|
24 |
+
requestId: uuidv4(),
|
25 |
+
summary: '',
|
26 |
+
conversationId: uuidv4(),
|
27 |
+
};
|
28 |
+
const errMsg = $root.ChatMessage.verify(message);
|
29 |
+
if (errMsg) throw Error(errMsg);
|
30 |
+
|
31 |
+
const messageInstance = $root.ChatMessage.create(message);
|
32 |
|
33 |
+
const buffer = $root.ChatMessage.encode(messageInstance).finish();
|
34 |
+
const hexString = (buffer.length.toString(16).padStart(10, '0') + buffer.toString('hex')).toUpperCase();
|
35 |
+
|
36 |
+
return Buffer.from(hexString, 'hex');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
}
|
38 |
|
39 |
+
async function chunkToUtf8String(chunk) {
|
40 |
+
try {
|
41 |
+
let hex = Buffer.from(chunk).toString('hex');
|
|
|
|
|
|
|
42 |
|
43 |
+
let offset = 0;
|
44 |
+
let results = [];
|
45 |
|
46 |
+
while (offset < hex.length) {
|
47 |
+
if (offset + 10 > hex.length) break;
|
48 |
|
49 |
+
const dataLength = parseInt(hex.slice(offset, offset + 10), 16);
|
50 |
+
offset += 10;
|
51 |
+
|
52 |
+
if (offset + dataLength * 2 > hex.length) break;
|
53 |
+
|
54 |
+
const messageHex = hex.slice(offset, offset + dataLength * 2);
|
55 |
+
offset += dataLength * 2;
|
56 |
+
|
57 |
+
const messageBuffer = Buffer.from(messageHex, 'hex');
|
58 |
+
const message = $root.ResMessage.decode(messageBuffer);
|
59 |
+
results.push(message.msg);
|
60 |
}
|
61 |
|
62 |
+
if (results.length == 0) {
|
63 |
+
return gunzip(chunk);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
}
|
65 |
+
return results.join('');
|
66 |
+
} catch (err) {
|
67 |
+
return gunzip(chunk);
|
68 |
}
|
69 |
+
}
|
70 |
|
71 |
+
function gunzip(chunk) {
|
72 |
+
return new Promise((resolve, reject) => {
|
73 |
+
zlib.gunzip(chunk.slice(5), (err, decompressed) => {
|
74 |
+
if (err) {
|
75 |
+
resolve('');
|
76 |
+
} else {
|
77 |
+
const text = decompressed.toString('utf-8');
|
78 |
+
// 这里只是尝试解析错误数据,如果是包含了全量的返回结果直接忽略
|
79 |
+
if (regex.test(text)) {
|
80 |
+
resolve('');
|
81 |
+
} else {
|
82 |
+
resolve(text);
|
83 |
+
}
|
84 |
+
}
|
85 |
+
});
|
86 |
+
});
|
87 |
+
}
|
88 |
|
89 |
+
function getRandomIDPro({ size, dictType, customDict }) {
|
90 |
+
let random = '';
|
91 |
+
if (!customDict) {
|
92 |
+
switch (dictType) {
|
93 |
+
case 'alphabet':
|
94 |
+
customDict = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
|
95 |
+
break;
|
96 |
+
case 'max':
|
97 |
+
customDict = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-';
|
98 |
+
break;
|
99 |
+
default:
|
100 |
+
customDict = '0123456789';
|
101 |
+
}
|
102 |
+
}
|
103 |
+
for (; size--; ) random += customDict[(Math.random() * customDict.length) | 0];
|
104 |
+
return random;
|
105 |
}
|
106 |
|
107 |
module.exports = {
|
108 |
stringToHex,
|
109 |
+
chunkToUtf8String,
|
110 |
+
getRandomIDPro,
|
111 |
+
};
|