JIMMYGGG commited on
Commit
f74e90b
·
verified ·
1 Parent(s): 59cc3b4

Update src/utils.js

Browse files
Files changed (1) hide show
  1. src/utils.js +94 -108
src/utils.js CHANGED
@@ -1,125 +1,111 @@
1
- // Helper function to convert string to hex bytes
2
- function stringToHex (str, modelName) {
3
- const bytes = Buffer.from(str, 'utf-8')
4
- const byteLength = bytes.length
5
-
6
- // Calculate lengths and fields similar to Python version
7
- const FIXED_HEADER = 2
8
- const SEPARATOR = 1
9
- const FIXED_SUFFIX_LENGTH = 0xA3 + modelName.length
10
-
11
- // 计算文本长度字段 (类似 Python 中的 base_length1)
12
- let textLengthField1, textLengthFieldSize1
13
- if (byteLength < 128) {
14
- textLengthField1 = byteLength.toString(16).padStart(2, '0')
15
- textLengthFieldSize1 = 1
16
- } else {
17
- const lowByte1 = (byteLength & 0x7F) | 0x80
18
- const highByte1 = (byteLength >> 7) & 0xFF
19
- textLengthField1 = lowByte1.toString(16).padStart(2, '0') + highByte1.toString(16).padStart(2, '0')
20
- textLengthFieldSize1 = 2
21
- }
22
 
23
- // 计算基础长度 (类似 Python 中的 base_length)
24
- const baseLength = byteLength + 0x2A
25
- let textLengthField, textLengthFieldSize
26
- if (baseLength < 128) {
27
- textLengthField = baseLength.toString(16).padStart(2, '0')
28
- textLengthFieldSize = 1
29
- } else {
30
- const lowByte = (baseLength & 0x7F) | 0x80
31
- const highByte = (baseLength >> 7) & 0xFF
32
- textLengthField = lowByte.toString(16).padStart(2, '0') + highByte.toString(16).padStart(2, '0')
33
- textLengthFieldSize = 2
34
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- // 计算总消息长度
37
- const messageTotalLength = FIXED_HEADER + textLengthFieldSize + SEPARATOR +
38
- textLengthFieldSize1 + byteLength + FIXED_SUFFIX_LENGTH
39
-
40
- const messageLengthHex = messageTotalLength.toString(16).padStart(10, '0')
41
-
42
- // 构造完整的十六进制字符串
43
- const hexString = (
44
- messageLengthHex +
45
- '12' +
46
- textLengthField +
47
- '0A' +
48
- textLengthField1 +
49
- bytes.toString('hex') +
50
- '10016A2432343163636435662D393162612D343131382D393239612D3936626330313631626432612' +
51
- '2002A132F643A2F6964656150726F2F656475626F73733A1E0A' +
52
- // 将模型名称长度转换为两位十六进制,并确保是大写
53
- Buffer.from(modelName, 'utf-8').length.toString(16).padStart(2, '0').toUpperCase() +
54
- Buffer.from(modelName, 'utf-8').toString('hex').toUpperCase() +
55
- '22004A' +
56
- '24' + '61383761396133342D323164642D343863372D623434662D616636633365636536663765' +
57
- '680070007A2436393337376535612D386332642D343835342D623564392D653062623232336163303061' +
58
- '800101B00100C00100E00100E80100'
59
- ).toUpperCase()
60
- return Buffer.from(hexString, 'hex')
61
  }
62
 
63
- // 封装函数,用于将 chunk 转换为 UTF-8 字符串
64
- function chunkToUtf8String (chunk) {
65
- // 只处理以 0x00 0x00 0x00 0x00 开头的 chunk,其他不处理,不然会有乱码
66
- if (!(chunk[0] === 0x00 && chunk[1] === 0x00)) {
67
- return ''
68
- }
69
 
70
- console.log('chunk:', Buffer.from(chunk).toString('hex'))
71
- console.log('chunk string:', Buffer.from(chunk).toString('utf-8'))
72
 
73
- // 去掉 chunk 0x0A 以及之前的字符
74
- chunk = chunk.slice(chunk.indexOf(0x0A) + 1)
75
 
76
- let filteredChunk = []
77
- let i = 0
78
- while (i < chunk.length) {
79
- // 新的条件过滤:如果遇到连续4个0x00,则移除其之后所有的以 0 开头的字节(0x00 0x0F)
80
- if (chunk.slice(i, i + 4).every(byte => byte === 0x00)) {
81
- i += 4 // 跳过这4个0x00
82
- while (i < chunk.length && chunk[i] >= 0x00 && chunk[i] <= 0x0F) {
83
- i++ // 跳过所有以 0 开头的字节
84
- }
85
- continue
 
86
  }
87
 
88
- if (chunk[i] === 0x0C) {
89
- // 遇到 0x0C 时,跳过 0x0C 以及后续的所有连续的 0x0A
90
- i++ // 跳过 0x0C
91
- while (i < chunk.length && chunk[i] === 0x0A) {
92
- i++ // 跳过所有连续的 0x0A
93
- }
94
- } else if (
95
- i > 0 &&
96
- chunk[i] === 0x0A &&
97
- chunk[i - 1] >= 0x00 &&
98
- chunk[i - 1] <= 0x09
99
- ) {
100
- // 如果当前字节是 0x0A,且前一个字节在 0x00 至 0x09 之间,跳过前一个字节和当前字节
101
- filteredChunk.pop() // 移除已添加的前一个字节
102
- i++ // 跳过当前的 0x0A
103
- } else {
104
- filteredChunk.push(chunk[i])
105
- i++
106
  }
 
 
 
107
  }
 
108
 
109
- // 第二步:去除所有的 0x00 和 0x0C
110
- filteredChunk = filteredChunk.filter((byte) => byte !== 0x00 && byte !== 0x0C)
111
-
112
- // 去除小于 0x0A 的字节
113
- filteredChunk = filteredChunk.filter((byte) => byte >= 0x0A)
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- const hexString = Buffer.from(filteredChunk).toString('hex')
116
- console.log('hexString:', hexString)
117
- const utf8String = Buffer.from(filteredChunk).toString('utf-8')
118
- console.log('utf8String:', utf8String)
119
- return utf8String
 
 
 
 
 
 
 
 
 
 
 
120
  }
121
 
122
  module.exports = {
123
  stringToHex,
124
- chunkToUtf8String
125
- }
 
 
1
+ const { v4: uuidv4 } = require('uuid');
2
+ const zlib = require('zlib');
3
+ const $root = require('./message.js');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ const regex = /<\|BEGIN_SYSTEM\|>.*?<\|END_SYSTEM\|>.*?<\|BEGIN_USER\|>.*?<\|END_USER\|>/s;
6
+
7
+ async function stringToHex(messages, modelName) {
8
+ const formattedMessages = messages.map((msg) => ({
9
+ ...msg,
10
+ role: msg.role === 'user' ? 1 : 2,
11
+ message_id: uuidv4(),
12
+ }));
13
+
14
+ const message = {
15
+ messages: formattedMessages,
16
+ instructions: {
17
+ instruction: 'Always respond in 中文',
18
+ },
19
+ projectPath: '/path/to/project',
20
+ model: {
21
+ name: modelName,
22
+ empty: '',
23
+ },
24
+ requestId: uuidv4(),
25
+ summary: '',
26
+ conversationId: uuidv4(),
27
+ };
28
+ const errMsg = $root.ChatMessage.verify(message);
29
+ if (errMsg) throw Error(errMsg);
30
+
31
+ const messageInstance = $root.ChatMessage.create(message);
32
 
33
+ const buffer = $root.ChatMessage.encode(messageInstance).finish();
34
+ const hexString = (buffer.length.toString(16).padStart(10, '0') + buffer.toString('hex')).toUpperCase();
35
+
36
+ return Buffer.from(hexString, 'hex');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }
38
 
39
+ async function chunkToUtf8String(chunk) {
40
+ try {
41
+ let hex = Buffer.from(chunk).toString('hex');
 
 
 
42
 
43
+ let offset = 0;
44
+ let results = [];
45
 
46
+ while (offset < hex.length) {
47
+ if (offset + 10 > hex.length) break;
48
 
49
+ const dataLength = parseInt(hex.slice(offset, offset + 10), 16);
50
+ offset += 10;
51
+
52
+ if (offset + dataLength * 2 > hex.length) break;
53
+
54
+ const messageHex = hex.slice(offset, offset + dataLength * 2);
55
+ offset += dataLength * 2;
56
+
57
+ const messageBuffer = Buffer.from(messageHex, 'hex');
58
+ const message = $root.ResMessage.decode(messageBuffer);
59
+ results.push(message.msg);
60
  }
61
 
62
+ if (results.length == 0) {
63
+ return gunzip(chunk);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  }
65
+ return results.join('');
66
+ } catch (err) {
67
+ return gunzip(chunk);
68
  }
69
+ }
70
 
71
+ function gunzip(chunk) {
72
+ return new Promise((resolve, reject) => {
73
+ zlib.gunzip(chunk.slice(5), (err, decompressed) => {
74
+ if (err) {
75
+ resolve('');
76
+ } else {
77
+ const text = decompressed.toString('utf-8');
78
+ // 这里只是尝试解析错误数据,如果是包含了全量的返回结果直接忽略
79
+ if (regex.test(text)) {
80
+ resolve('');
81
+ } else {
82
+ resolve(text);
83
+ }
84
+ }
85
+ });
86
+ });
87
+ }
88
 
89
+ function getRandomIDPro({ size, dictType, customDict }) {
90
+ let random = '';
91
+ if (!customDict) {
92
+ switch (dictType) {
93
+ case 'alphabet':
94
+ customDict = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
95
+ break;
96
+ case 'max':
97
+ customDict = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-';
98
+ break;
99
+ default:
100
+ customDict = '0123456789';
101
+ }
102
+ }
103
+ for (; size--; ) random += customDict[(Math.random() * customDict.length) | 0];
104
+ return random;
105
  }
106
 
107
  module.exports = {
108
  stringToHex,
109
+ chunkToUtf8String,
110
+ getRandomIDPro,
111
+ };