Spaces:

JIMMYGGG
/

cursor

Running

App Files Files Community

JIMMYGGG commited on Nov 26, 2024

Commit

f74e90b

verified ·

1 Parent(s): 59cc3b4

Update src/utils.js

Browse files

Files changed (1) hide show

src/utils.js +94 -108

src/utils.js CHANGED Viewed

@@ -1,125 +1,111 @@
-// Helper function to convert string to hex bytes
-function stringToHex (str, modelName) {
-  const bytes = Buffer.from(str, 'utf-8')
-  const byteLength = bytes.length
-  // Calculate lengths and fields similar to Python version
-  const FIXED_HEADER = 2
-  const SEPARATOR = 1
-  const FIXED_SUFFIX_LENGTH = 0xA3 + modelName.length
-  // 计算文本长度字段 (类似 Python 中的 base_length1)
-  let textLengthField1, textLengthFieldSize1
-  if (byteLength < 128) {
-    textLengthField1 = byteLength.toString(16).padStart(2, '0')
-    textLengthFieldSize1 = 1
-  } else {
-    const lowByte1 = (byteLength & 0x7F) | 0x80
-    const highByte1 = (byteLength >> 7) & 0xFF
-    textLengthField1 = lowByte1.toString(16).padStart(2, '0') + highByte1.toString(16).padStart(2, '0')
-    textLengthFieldSize1 = 2
-  }
-  // 计算基础长度 (类似 Python 中的 base_length)
-  const baseLength = byteLength + 0x2A
-  let textLengthField, textLengthFieldSize
-  if (baseLength < 128) {
-    textLengthField = baseLength.toString(16).padStart(2, '0')
-    textLengthFieldSize = 1
-  } else {
-    const lowByte = (baseLength & 0x7F) | 0x80
-    const highByte = (baseLength >> 7) & 0xFF
-    textLengthField = lowByte.toString(16).padStart(2, '0') + highByte.toString(16).padStart(2, '0')
-    textLengthFieldSize = 2
-  }
-  // 计算总消息长度
-  const messageTotalLength = FIXED_HEADER + textLengthFieldSize + SEPARATOR +
-        textLengthFieldSize1 + byteLength + FIXED_SUFFIX_LENGTH
-  const messageLengthHex = messageTotalLength.toString(16).padStart(10, '0')
-  // 构造完整的十六进制字符串
-  const hexString = (
-    messageLengthHex +
-        '12' +
-        textLengthField +
-        '0A' +
-        textLengthField1 +
-        bytes.toString('hex') +
-        '10016A2432343163636435662D393162612D343131382D393239612D3936626330313631626432612' +
-        '2002A132F643A2F6964656150726F2F656475626F73733A1E0A' +
-        // 将模型名称长度转换为两位十六进制，并确保是大写
-        Buffer.from(modelName, 'utf-8').length.toString(16).padStart(2, '0').toUpperCase() +
-        Buffer.from(modelName, 'utf-8').toString('hex').toUpperCase() +
-        '22004A' +
-        '24' + '61383761396133342D323164642D343863372D623434662D616636633365636536663765' +
-        '680070007A2436393337376535612D386332642D343835342D623564392D653062623232336163303061' +
-        '800101B00100C00100E00100E80100'
-  ).toUpperCase()
-  return Buffer.from(hexString, 'hex')
 }
-// 封装函数，用于将 chunk 转换为 UTF-8 字符串
-function chunkToUtf8String (chunk) {
-  // 只处理以 0x00 0x00 0x00 0x00 开头的 chunk，其他不处理，不然会有乱码
-  if (!(chunk[0] === 0x00 && chunk[1] === 0x00)) {
-    return ''
-  }
-  console.log('chunk:', Buffer.from(chunk).toString('hex'))
-  console.log('chunk string:', Buffer.from(chunk).toString('utf-8'))
-  // 去掉 chunk 中 0x0A 以及之前的字符
-  chunk = chunk.slice(chunk.indexOf(0x0A) + 1)
-  let filteredChunk = []
-  let i = 0
-  while (i < chunk.length) {
-    // 新的条件过滤：如果遇到连续4个0x00，则移除其之后所有的以 0 开头的字节（0x00 到 0x0F）
-    if (chunk.slice(i, i + 4).every(byte => byte === 0x00)) {
-      i += 4 // 跳过这4个0x00
-      while (i < chunk.length && chunk[i] >= 0x00 && chunk[i] <= 0x0F) {
-        i++ // 跳过所有以 0 开头的字节
-      }
-      continue
     }
-    if (chunk[i] === 0x0C) {
-      // 遇到 0x0C 时，跳过 0x0C 以及后续的所有连续的 0x0A
-      i++ // 跳过 0x0C
-      while (i < chunk.length && chunk[i] === 0x0A) {
-        i++ // 跳过所有连续的 0x0A
-      }
-    } else if (
-      i > 0 &&
-      chunk[i] === 0x0A &&
-      chunk[i - 1] >= 0x00 &&
-      chunk[i - 1] <= 0x09
-    ) {
-      // 如果当前字节是 0x0A，且前一个字节在 0x00 至 0x09 之间，跳过前一个字节和当前字节
-      filteredChunk.pop() // 移除已添加的前一个字节
-      i++ // 跳过当前的 0x0A
-    } else {
-      filteredChunk.push(chunk[i])
-      i++
     }
   }
-  // 第二步：去除所有的 0x00 和 0x0C
-  filteredChunk = filteredChunk.filter((byte) => byte !== 0x00 && byte !== 0x0C)
-  // 去除小于 0x0A 的字节
-  filteredChunk = filteredChunk.filter((byte) => byte >= 0x0A)
-  const hexString = Buffer.from(filteredChunk).toString('hex')
-  console.log('hexString:', hexString)
-  const utf8String = Buffer.from(filteredChunk).toString('utf-8')
-  console.log('utf8String:', utf8String)
-  return utf8String
 }
 module.exports = {
   stringToHex,
-  chunkToUtf8String
-}

+const { v4: uuidv4 } = require('uuid');
+const zlib = require('zlib');
+const $root = require('./message.js');
+const regex = /<\|BEGIN_SYSTEM\|>.*?<\|END_SYSTEM\|>.*?<\|BEGIN_USER\|>.*?<\|END_USER\|>/s;
+async function stringToHex(messages, modelName) {
+  const formattedMessages = messages.map((msg) => ({
+    ...msg,
+    role: msg.role === 'user' ? 1 : 2,
+    message_id: uuidv4(),
+  }));
+  const message = {
+    messages: formattedMessages,
+    instructions: {
+      instruction: 'Always respond in 中文',
+    },
+    projectPath: '/path/to/project',
+    model: {
+      name: modelName,
+      empty: '',
+    },
+    requestId: uuidv4(),
+    summary: '',
+    conversationId: uuidv4(),
+  };
+  const errMsg = $root.ChatMessage.verify(message);
+  if (errMsg) throw Error(errMsg);
+  const messageInstance = $root.ChatMessage.create(message);
+  const buffer = $root.ChatMessage.encode(messageInstance).finish();
+  const hexString = (buffer.length.toString(16).padStart(10, '0') + buffer.toString('hex')).toUpperCase();
+  return Buffer.from(hexString, 'hex');
 }
+async function chunkToUtf8String(chunk) {
+  try {
+    let hex = Buffer.from(chunk).toString('hex');
+    let offset = 0;
+    let results = [];
+    while (offset < hex.length) {
+      if (offset + 10 > hex.length) break;
+      const dataLength = parseInt(hex.slice(offset, offset + 10), 16);
+      offset += 10;
+      if (offset + dataLength * 2 > hex.length) break;
+      const messageHex = hex.slice(offset, offset + dataLength * 2);
+      offset += dataLength * 2;
+      const messageBuffer = Buffer.from(messageHex, 'hex');
+      const message = $root.ResMessage.decode(messageBuffer);
+      results.push(message.msg);
     }
+    if (results.length == 0) {
+      return gunzip(chunk);
     }
+    return results.join('');
+  } catch (err) {
+    return gunzip(chunk);
   }
+}
+function gunzip(chunk) {
+  return new Promise((resolve, reject) => {
+    zlib.gunzip(chunk.slice(5), (err, decompressed) => {
+      if (err) {
+        resolve('');
+      } else {
+        const text = decompressed.toString('utf-8');
+        // 这里只是尝试解析错误数据，如果是包含了全量的返回结果直接忽略
+        if (regex.test(text)) {
+          resolve('');
+        } else {
+          resolve(text);
+        }
+      }
+    });
+  });
+}
+function getRandomIDPro({ size, dictType, customDict }) {
+  let random = '';
+  if (!customDict) {
+    switch (dictType) {
+      case 'alphabet':
+        customDict = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
+        break;
+      case 'max':
+        customDict = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-';
+        break;
+      default:
+        customDict = '0123456789';
+    }
+  }
+  for (; size--; ) random += customDict[(Math.random() * customDict.length) | 0];
+  return random;
 }
 module.exports = {
   stringToHex,
+  chunkToUtf8String,
+  getRandomIDPro,
+};