Archer 2bf1fce32a
perf: file encoding;perf: leave team code;@c121914yu perf: full text search code (#3528)
* perf: text encoding

* perf: leave team code

* perf: full text search code

* fix: http status

* perf: embedding search and vector avatar
2025-01-06 12:43:33 +08:00

78 lines
2.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { detect } from 'jschardet';
import { documentFileType, imageFileType } from './constants';
import { ChatFileTypeEnum } from '../../core/chat/constants';
import { UserChatItemValueItemType } from '../../core/chat/type';
import * as fs from 'fs';
export const formatFileSize = (bytes: number): string => {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
};
export const detectFileEncoding = (buffer: Buffer) => {
return detect(buffer.slice(0, 200))?.encoding?.toLocaleLowerCase();
};
export const detectFileEncodingByPath = async (path: string) => {
// Get 64KB file head
const MAX_BYTES = 64 * 1024;
const buffer = Buffer.alloc(MAX_BYTES);
const fd = await fs.promises.open(path, 'r');
try {
// Read file head
const { bytesRead } = await fd.read(buffer, 0, MAX_BYTES, 0);
const actualBuffer = buffer.slice(0, bytesRead);
return detect(actualBuffer)?.encoding?.toLocaleLowerCase();
} finally {
await fd.close();
}
};
// Url => user upload file type
export const parseUrlToFileType = (url: string): UserChatItemValueItemType['file'] | undefined => {
if (typeof url !== 'string') return;
const parseUrl = new URL(url, 'https://locaohost:3000');
const filename = (() => {
// Check base64 image
if (url.startsWith('data:image/')) {
const mime = url.split(',')[0].split(':')[1].split(';')[0];
return `image.${mime.split('/')[1]}`;
}
// Old version file url: https://xxx.com/file/read?filename=xxx.pdf
const filenameQuery = parseUrl.searchParams.get('filename');
if (filenameQuery) return filenameQuery;
// Common file https://xxx.com/xxx.pdf?xxxx=xxx
const pathname = parseUrl.pathname;
if (pathname) return pathname.split('/').pop();
})();
if (!filename) return;
const extension = filename.split('.').pop()?.toLowerCase() || '';
if (!extension) return;
if (documentFileType.includes(extension)) {
return {
type: ChatFileTypeEnum.file,
name: filename,
url
};
}
if (imageFileType.includes(extension)) {
return {
type: ChatFileTypeEnum.image,
name: filename,
url
};
}
};