perf: read file token error
This commit is contained in:
parent
93030afe3e
commit
13439c5183
@ -173,9 +173,16 @@ export const splitText2Chunks = ({ text, maxLen }: { text: string; maxLen: numbe
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
const enc = getOpenAiEncMap();
|
||||
const encodeText = enc.encode(chunks.join(''));
|
||||
const tokens = encodeText.length;
|
||||
const tokens = (() => {
|
||||
try {
|
||||
const enc = getOpenAiEncMap();
|
||||
const encodeText = enc.encode(chunks.join(''));
|
||||
const tokens = encodeText.length;
|
||||
return tokens;
|
||||
} catch (error) {
|
||||
return chunks.join('').length;
|
||||
}
|
||||
})();
|
||||
|
||||
return {
|
||||
chunks,
|
||||
@ -274,5 +281,6 @@ export const simpleText = (text: string) => {
|
||||
text = text.replace(/([\u4e00-\u9fa5])\s+([\u4e00-\u9fa5])/g, '$1$2');
|
||||
text = text.replace(/\n{2,}/g, '\n');
|
||||
text = text.replace(/\s{2,}/g, ' ');
|
||||
text = text.replace(/[^\x00-\x7F]/g, ' ');
|
||||
return text;
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user