Feat: pptx and xlsx loader (#1118)

* perf: plan tip

* perf: upload size controller

* feat: add image ttl index

* feat: new upload file ux

* remove file

* feat: support read pptx

* feat: support xlsx

* fix: rerank docker flie
This commit is contained in:
Archer 2024-04-01 19:01:26 +08:00 committed by GitHub
parent f9d266a6af
commit 21288d1736
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
90 changed files with 2707 additions and 1678 deletions

View File

@ -22,7 +22,7 @@ weight: 356
## 工具是如何运行的 ## 工具是如何运行的
要了解工具如何允许,首先需要知道它的运行条件。 要了解工具如何运行的,首先需要知道它的运行条件。
1. 需要工具的介绍或者叫描述。这个介绍会告诉LLM这个工具的作用是什么LLM会根据上下文语义决定是否需要调用这个工具。 1. 需要工具的介绍或者叫描述。这个介绍会告诉LLM这个工具的作用是什么LLM会根据上下文语义决定是否需要调用这个工具。
2. 工具的参数。有些工具调用时可能需要一些特殊的参数。参数中有2个关键的值`参数介绍``是否必须` 2. 工具的参数。有些工具调用时可能需要一些特殊的参数。参数中有2个关键的值`参数介绍``是否必须`

View File

@ -3,12 +3,17 @@ import { ErrType } from '../errorCode';
/* dataset: 507000 */ /* dataset: 507000 */
const startCode = 507000; const startCode = 507000;
export enum CommonErrEnum { export enum CommonErrEnum {
fileNotFound = 'fileNotFound' fileNotFound = 'fileNotFound',
unAuthFile = 'unAuthFile'
} }
const datasetErr = [ const datasetErr = [
{ {
statusText: CommonErrEnum.fileNotFound, statusText: CommonErrEnum.fileNotFound,
message: 'error.fileNotFound' message: 'error.fileNotFound'
},
{
statusText: CommonErrEnum.unAuthFile,
message: 'error.unAuthFile'
} }
]; ];
export default datasetErr.reduce((acc, cur, index) => { export default datasetErr.reduce((acc, cur, index) => {

View File

@ -40,9 +40,9 @@ export const splitText2Chunks = (props: {
{ reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 }, { reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
{ reg: /([\n]([`~]))/g, maxLen: chunkLen * 4 }, // code block { reg: /([\n]([`~]))/g, maxLen: chunkLen * 4 }, // code block
{ reg: /([\n](?!\s*[\*\-|>0-9]))/g, maxLen: chunkLen * 2 }, // (?![\*\-|>`0-9]): markdown special char { reg: /([\n](?!\s*[\*\-|>0-9]))/g, maxLen: chunkLen * 2 }, // 增大块,尽可能保证它是一个完整的段落。 (?![\*\-|>`0-9]): markdown special char
{ reg: /([\n])/g, maxLen: chunkLen * 1.2 }, { reg: /([\n])/g, maxLen: chunkLen * 1.2 },
// ------ There's no overlap on the top
{ reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.2 }, { reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.2 },
{ reg: /([]|!\s)/g, maxLen: chunkLen * 1.2 }, { reg: /([]|!\s)/g, maxLen: chunkLen * 1.2 },
{ reg: /([]|\?\s)/g, maxLen: chunkLen * 1.4 }, { reg: /([]|\?\s)/g, maxLen: chunkLen * 1.4 },
@ -56,7 +56,7 @@ export const splitText2Chunks = (props: {
const checkIndependentChunk = (step: number) => step >= customRegLen && step <= 4 + customRegLen; const checkIndependentChunk = (step: number) => step >= customRegLen && step <= 4 + customRegLen;
const checkForbidOverlap = (step: number) => step <= 6 + customRegLen; const checkForbidOverlap = (step: number) => step <= 6 + customRegLen;
// if use markdown title split, Separate record title title // if use markdown title split, Separate record title
const getSplitTexts = ({ text, step }: { text: string; step: number }) => { const getSplitTexts = ({ text, step }: { text: string; step: number }) => {
if (step >= stepReges.length) { if (step >= stepReges.length) {
return [ return [
@ -97,6 +97,7 @@ export const splitText2Chunks = (props: {
.filter((item) => item.text.trim()); .filter((item) => item.text.trim());
}; };
/* Gets the overlap at the end of a text as the beginning of the next block */
const getOneTextOverlapText = ({ text, step }: { text: string; step: number }): string => { const getOneTextOverlapText = ({ text, step }: { text: string; step: number }): string => {
const forbidOverlap = checkForbidOverlap(step); const forbidOverlap = checkForbidOverlap(step);
const maxOverlapLen = chunkLen * 0.4; const maxOverlapLen = chunkLen * 0.4;

View File

@ -55,6 +55,7 @@ export type FastGPTFeConfigsType = {
customApiDomain?: string; customApiDomain?: string;
customSharePageDomain?: string; customSharePageDomain?: string;
uploadFileMaxAmount?: number;
uploadFileMaxSize?: number; uploadFileMaxSize?: number;
}; };

View File

@ -44,14 +44,18 @@ export type TextCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams
export type LinkCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & { export type LinkCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
link: string; link: string;
}; };
export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
fileId: string;
};
export type FileCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & { export type FileCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
name: string;
rawTextLength: number;
hashRawText: string;
fileMetadata?: Record<string, any>; fileMetadata?: Record<string, any>;
collectionMetadata?: Record<string, any>; collectionMetadata?: Record<string, any>;
}; };
export type CsvTableCreateDatasetCollectionParams = {
datasetId: string;
parentId?: string;
fileId: string;
};
/* ================= data ===================== */ /* ================= data ===================== */
export type PgSearchRawType = { export type PgSearchRawType = {

View File

@ -73,6 +73,13 @@ export const DatasetCollectionSyncResultMap = {
/* ------------ data -------------- */ /* ------------ data -------------- */
/* ------------ training -------------- */ /* ------------ training -------------- */
export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
csvTable = 'csvTable'
}
export enum TrainingModeEnum { export enum TrainingModeEnum {
chunk = 'chunk', chunk = 'chunk',
auto = 'auto', auto = 'auto',

View File

@ -2,18 +2,18 @@
"name": "@fastgpt/global", "name": "@fastgpt/global",
"version": "1.0.0", "version": "1.0.0",
"dependencies": { "dependencies": {
"@apidevtools/swagger-parser": "^10.1.0",
"axios": "^1.5.1", "axios": "^1.5.1",
"dayjs": "^1.11.7", "dayjs": "^1.11.7",
"encoding": "^0.1.13", "encoding": "^0.1.13",
"js-tiktoken": "^1.0.7", "js-tiktoken": "^1.0.7",
"openapi-types": "^12.1.3",
"openai": "4.28.0",
"nanoid": "^4.0.1",
"js-yaml": "^4.1.0", "js-yaml": "^4.1.0",
"timezones-list": "^3.0.2",
"next": "13.5.2",
"jschardet": "3.1.1", "jschardet": "3.1.1",
"@apidevtools/swagger-parser": "^10.1.0" "nanoid": "^4.0.1",
"next": "13.5.2",
"openai": "4.28.0",
"openapi-types": "^12.1.3",
"timezones-list": "^3.0.2"
}, },
"devDependencies": { "devDependencies": {
"@types/js-yaml": "^4.0.9", "@types/js-yaml": "^4.0.9",

View File

@ -0,0 +1,33 @@
import { connectionMongo, type Model } from '../../mongo';
const { Schema, model, models } = connectionMongo;
import { RawTextBufferSchemaType } from './type';
export const collectionName = 'buffer.rawText';
const RawTextBufferSchema = new Schema({
sourceId: {
type: String,
required: true
},
rawText: {
type: String,
default: ''
},
createTime: {
type: Date,
default: () => new Date()
},
metadata: Object
});
try {
RawTextBufferSchema.index({ sourceId: 1 });
// 20 minutes
RawTextBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 20 * 60 });
} catch (error) {
console.log(error);
}
export const MongoRwaTextBuffer: Model<RawTextBufferSchemaType> =
models[collectionName] || model(collectionName, RawTextBufferSchema);
MongoRwaTextBuffer.syncIndexes();

View File

@ -0,0 +1,8 @@
export type RawTextBufferSchemaType = {
sourceId: string;
rawText: string;
createTime: Date;
metadata?: {
filename: string;
};
};

View File

@ -2,7 +2,7 @@ import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo; const { Schema, model, models } = connectionMongo;
import { TTSBufferSchemaType } from './type.d'; import { TTSBufferSchemaType } from './type.d';
export const collectionName = 'ttsbuffers'; export const collectionName = 'buffer.tts';
const TTSBufferSchema = new Schema({ const TTSBufferSchema = new Schema({
bufferId: { bufferId: {

View File

@ -4,6 +4,18 @@ import fsp from 'fs/promises';
import fs from 'fs'; import fs from 'fs';
import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type'; import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoFileSchema } from './schema'; import { MongoFileSchema } from './schema';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { readFileRawText } from '../read/rawText';
import { ReadFileByBufferParams } from '../read/type';
import { readMarkdown } from '../read/markdown';
import { readHtmlRawText } from '../read/html';
import { readPdfFile } from '../read/pdf';
import { readWordFile } from '../read/word';
import { readCsvRawText } from '../read/csv';
import { MongoRwaTextBuffer } from '../../buffer/rawText/schema';
import { readPptxRawText } from '../read/pptx';
import { readXlsxRawText } from '../read/xlsx';
export function getGFSCollection(bucket: `${BucketNameEnum}`) { export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoFileSchema; MongoFileSchema;
@ -111,3 +123,139 @@ export async function getDownloadStream({
return bucket.openDownloadStream(new Types.ObjectId(fileId)); return bucket.openDownloadStream(new Types.ObjectId(fileId));
} }
export const readFileEncode = async ({
bucketName,
fileId
}: {
bucketName: `${BucketNameEnum}`;
fileId: string;
}) => {
const encodeStream = await getDownloadStream({ bucketName, fileId });
let buffers: Buffer = Buffer.from([]);
for await (const chunk of encodeStream) {
buffers = Buffer.concat([buffers, chunk]);
if (buffers.length > 10) {
encodeStream.abort();
break;
}
}
const encoding = detectFileEncoding(buffers);
return encoding as BufferEncoding;
};
export const readFileContent = async ({
teamId,
bucketName,
fileId,
csvFormat = false
}: {
teamId: string;
bucketName: `${BucketNameEnum}`;
fileId: string;
csvFormat?: boolean;
}): Promise<{
rawText: string;
filename: string;
}> => {
// read buffer
const fileBuffer = await MongoRwaTextBuffer.findOne({ sourceId: fileId }).lean();
if (fileBuffer) {
return {
rawText: fileBuffer.rawText,
filename: fileBuffer.metadata?.filename || ''
};
}
const [file, encoding, fileStream] = await Promise.all([
getFileById({ bucketName, fileId }),
readFileEncode({ bucketName, fileId }),
getDownloadStream({ bucketName, fileId })
]);
if (!file) {
return Promise.reject(CommonErrEnum.fileNotFound);
}
const extension = file?.filename?.split('.')?.pop()?.toLowerCase() || '';
const fileBuffers = await (() => {
return new Promise<Buffer>((resolve, reject) => {
let buffers = Buffer.from([]);
fileStream.on('data', (chunk) => {
buffers = Buffer.concat([buffers, chunk]);
});
fileStream.on('end', () => {
resolve(buffers);
});
fileStream.on('error', (err) => {
reject(err);
});
});
})();
const params: ReadFileByBufferParams = {
teamId,
buffer: fileBuffers,
encoding,
metadata: {
relatedId: fileId
}
};
const { rawText } = await (async () => {
switch (extension) {
case 'txt':
return readFileRawText(params);
case 'md':
return readMarkdown(params);
case 'html':
return readHtmlRawText(params);
case 'pdf':
return readPdfFile(params);
case 'docx':
return readWordFile(params);
case 'pptx':
return readPptxRawText(params);
case 'xlsx':
const xlsxResult = await readXlsxRawText(params);
if (csvFormat) {
return {
rawText: xlsxResult.formatText || ''
};
}
return {
rawText: xlsxResult.rawText
};
case 'csv':
const csvResult = await readCsvRawText(params);
if (csvFormat) {
return {
rawText: csvResult.formatText || ''
};
}
return {
rawText: csvResult.rawText
};
default:
return Promise.reject('Only support .txt, .md, .html, .pdf, .docx, pptx, .csv, .xlsx');
}
})();
if (rawText.trim()) {
await MongoRwaTextBuffer.create({
sourceId: fileId,
rawText,
metadata: {
filename: file.filename
}
});
}
return {
rawText,
filename: file.filename
};
};

View File

@ -14,7 +14,6 @@ export async function uploadMongoImg({
teamId, teamId,
expiredTime, expiredTime,
metadata, metadata,
shareId shareId
}: UploadImgProps & { }: UploadImgProps & {
teamId: string; teamId: string;
@ -30,9 +29,8 @@ export async function uploadMongoImg({
type, type,
teamId, teamId,
binary, binary,
expiredTime: expiredTime, expiredTime,
metadata, metadata,
shareId shareId
}); });

View File

@ -25,13 +25,13 @@ const ImageSchema = new Schema({
enum: Object.keys(mongoImageTypeMap), enum: Object.keys(mongoImageTypeMap),
required: true required: true
}, },
metadata: { metadata: {
type: Object type: Object
} }
}); });
try { try {
// tts expired
ImageSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 60 }); ImageSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 60 });
ImageSchema.index({ type: 1 }); ImageSchema.index({ type: 1 });
ImageSchema.index({ createTime: 1 }); ImageSchema.index({ createTime: 1 });

View File

@ -0,0 +1,21 @@
import Papa from 'papaparse';
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import { readFileRawText } from './rawText';
// 加载源文件内容
export const readCsvRawText = async (params: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const { rawText } = readFileRawText(params);
const csvArr = Papa.parse(rawText).data as string[][];
const header = csvArr[0];
const formatText = header
? csvArr.map((item) => item.map((item, i) => `${header[i]}:${item}`).join('\n')).join('\n')
: '';
return {
rawText,
formatText
};
};

View File

@ -0,0 +1,23 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import { initMarkdownText } from './utils';
import { htmlToMarkdown } from '../../string/markdown';
import { readFileRawText } from './rawText';
export const readHtmlRawText = async (
params: ReadFileByBufferParams
): Promise<ReadFileResponse> => {
const { teamId, metadata } = params;
const { rawText: html } = readFileRawText(params);
const md = await htmlToMarkdown(html);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText
};
};

View File

@ -0,0 +1,18 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import { initMarkdownText } from './utils';
import { readFileRawText } from './rawText';
export const readMarkdown = async (params: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const { teamId, metadata } = params;
const { rawText: md } = readFileRawText(params);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText
};
};

View File

@ -0,0 +1,119 @@
import { getNanoid } from '@fastgpt/global/common/string/tools';
import fs from 'fs';
import decompress from 'decompress';
import { DOMParser } from '@xmldom/xmldom';
import { clearDirFiles } from '../utils';
import { addLog } from '../../system/log';
const DEFAULTDECOMPRESSSUBLOCATION = '/tmp';
function getNewFileName(ext: string) {
return `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}.${ext}`;
}
const parseString = (xml: string) => {
let parser = new DOMParser();
return parser.parseFromString(xml, 'text/xml');
};
const parsePowerPoint = async ({
filepath,
decompressPath,
encoding
}: {
filepath: string;
decompressPath: string;
encoding: BufferEncoding;
}) => {
// Files regex that hold our content of interest
const allFilesRegex = /ppt\/(notesSlides|slides)\/(notesSlide|slide)\d+.xml/g;
const slidesRegex = /ppt\/slides\/slide\d+.xml/g;
/** The decompress location which contains the filename in it */
const files = await decompress(filepath, decompressPath, {
filter: (x) => !!x.path.match(allFilesRegex)
});
// Verify if atleast the slides xml files exist in the extracted files list.
if (
files.length == 0 ||
!files.map((file) => file.path).some((filename) => filename.match(slidesRegex))
) {
return Promise.reject('解析 PPT 失败');
}
// Returning an array of all the xml contents read using fs.readFileSync
const xmlContentArray = files.map((file) =>
fs.readFileSync(`${decompressPath}/${file.path}`, encoding)
);
let responseArr: string[] = [];
xmlContentArray.forEach((xmlContent) => {
/** Find text nodes with a:p tags */
const xmlParagraphNodesList = parseString(xmlContent).getElementsByTagName('a:p');
/** Store all the text content to respond */
responseArr.push(
Array.from(xmlParagraphNodesList)
// Filter paragraph nodes than do not have any text nodes which are identifiable by a:t tag
.filter((paragraphNode) => paragraphNode.getElementsByTagName('a:t').length != 0)
.map((paragraphNode) => {
/** Find text nodes with a:t tags */
const xmlTextNodeList = paragraphNode.getElementsByTagName('a:t');
return Array.from(xmlTextNodeList)
.filter((textNode) => textNode.childNodes[0] && textNode.childNodes[0].nodeValue)
.map((textNode) => textNode.childNodes[0].nodeValue)
.join('');
})
.join('\n')
);
});
return responseArr.join('\n');
};
export const parseOffice = async ({
buffer,
encoding,
extension
}: {
buffer: Buffer;
encoding: BufferEncoding;
extension: string;
}) => {
// Prepare file for processing
// create temp file subdirectory if it does not exist
if (!fs.existsSync(DEFAULTDECOMPRESSSUBLOCATION)) {
fs.mkdirSync(DEFAULTDECOMPRESSSUBLOCATION, { recursive: true });
}
// temp file name
const filepath = getNewFileName(extension);
const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}`;
// const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/test`;
// write new file
fs.writeFileSync(filepath, buffer, {
encoding
});
const text = await (async () => {
try {
switch (extension) {
case 'pptx':
return parsePowerPoint({ filepath, decompressPath, encoding });
default:
return Promise.reject('只能读取 .pptx 文件');
}
} catch (error) {
addLog.error(`Load ppt error`, { error });
}
return '';
})();
fs.unlinkSync(filepath);
clearDirFiles(decompressPath);
return text;
};

View File

@ -1,5 +1,7 @@
/* read file to txt */ import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
import * as pdfjsLib from 'pdfjs-dist'; // @ts-ignore
import('pdfjs-dist/legacy/build/pdf.worker.min.mjs');
import { ReadFileByBufferParams, ReadFileResponse } from './type';
type TokenType = { type TokenType = {
str: string; str: string;
@ -11,9 +13,9 @@ type TokenType = {
hasEOL: boolean; hasEOL: boolean;
}; };
export const readPdfFile = async ({ pdf }: { pdf: ArrayBuffer }) => { export const readPdfFile = async ({
pdfjsLib.GlobalWorkerOptions.workerSrc = '/js/pdf.worker.js'; buffer
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const readPDFPage = async (doc: any, pageNo: number) => { const readPDFPage = async (doc: any, pageNo: number) => {
const page = await doc.getPage(pageNo); const page = await doc.getPage(pageNo);
const tokenizedText = await page.getTextContent(); const tokenizedText = await page.getTextContent();
@ -51,14 +53,19 @@ export const readPdfFile = async ({ pdf }: { pdf: ArrayBuffer }) => {
.join(''); .join('');
}; };
const doc = await pdfjsLib.getDocument(pdf).promise; const loadingTask = pdfjs.getDocument(buffer.buffer);
const doc = await loadingTask.promise;
const pageTextPromises = []; const pageTextPromises = [];
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) { for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
pageTextPromises.push(readPDFPage(doc, pageNo)); pageTextPromises.push(readPDFPage(doc, pageNo));
} }
const pageTexts = await Promise.all(pageTextPromises); const pageTexts = await Promise.all(pageTextPromises);
loadingTask.destroy();
return { return {
rawText: pageTexts.join('') rawText: pageTexts.join(''),
metadata: {}
}; };
}; };

View File

@ -0,0 +1,14 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
// import { parseOfficeAsync } from 'officeparser';
import { parseOffice } from './parseOffice';
export const readPptxRawText = async ({
buffer,
encoding
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const result = await parseOffice({ buffer, encoding, extension: 'pptx' });
return {
rawText: result
};
};

View File

@ -0,0 +1,10 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
// 加载源文件内容
export const readFileRawText = ({ buffer, encoding }: ReadFileByBufferParams): ReadFileResponse => {
const content = buffer.toString(encoding);
return {
rawText: content
};
};

View File

@ -0,0 +1,12 @@
export type ReadFileByBufferParams = {
teamId: string;
buffer: Buffer;
encoding: BufferEncoding;
metadata?: Record<string, any>;
};
export type ReadFileResponse = {
rawText: string;
formatText?: string;
metadata?: Record<string, any>;
};

View File

@ -0,0 +1,25 @@
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
import { uploadMongoImg } from '../image/controller';
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
import { addHours } from 'date-fns';
export const initMarkdownText = ({
teamId,
md,
metadata
}: {
md: string;
teamId: string;
metadata?: Record<string, any>;
}) =>
markdownProcess({
rawText: md,
uploadImgController: (base64Img) =>
uploadMongoImg({
type: MongoImageTypeEnum.collectionImage,
base64Img,
teamId,
metadata,
expiredTime: addHours(new Date(), 2)
})
});

View File

@ -0,0 +1,35 @@
import mammoth from 'mammoth';
import { htmlToMarkdown } from '../../string/markdown';
import { ReadFileByBufferParams, ReadFileResponse } from './type';
import { initMarkdownText } from './utils';
/**
* read docx to markdown
*/
export const readWordFile = async ({
teamId,
buffer,
metadata = {}
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
try {
const { value: html } = await mammoth.convertToHtml({
buffer
});
const md = await htmlToMarkdown(html);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText,
metadata: {}
};
} catch (error) {
console.log('error doc read:', error);
return Promise.reject('Can not read doc file, please convert to PDF');
}
};

View File

@ -0,0 +1,45 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import xlsx from 'node-xlsx';
import Papa from 'papaparse';
export const readXlsxRawText = async ({
buffer
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const result = xlsx.parse(buffer, {
skipHidden: false,
defval: ''
});
const format2Csv = result.map(({ name, data }) => {
return {
title: `#${name}`,
csvText: data.map((item) => item.join(',')).join('\n')
};
});
const rawText = format2Csv.map((item) => item.csvText).join('\n');
const formatText = format2Csv
.map((item) => {
const csvArr = Papa.parse(item.csvText).data as string[][];
const header = csvArr[0];
const formatText = header
? csvArr
.map((item) =>
item
.map((item, i) => (item ? `${header[i]}:${item}` : ''))
.filter(Boolean)
.join('\n')
)
.join('\n')
: '';
return `${item.title}\n${formatText}`;
})
.join('\n');
return {
rawText: rawText,
formatText
};
};

View File

@ -35,13 +35,8 @@ export const clearDirFiles = (dirPath: string) => {
return; return;
} }
fs.readdirSync(dirPath).forEach((file) => { fs.rmdirSync(dirPath, {
const curPath = `${dirPath}/${file}`; recursive: true
if (fs.lstatSync(curPath).isDirectory()) {
clearDirFiles(curPath);
} else {
fs.unlinkSync(curPath);
}
}); });
}; };

View File

@ -9,7 +9,6 @@ import {
DatasetCollectionSchemaType DatasetCollectionSchemaType
} from '@fastgpt/global/core/dataset/type'; } from '@fastgpt/global/core/dataset/type';
import { MongoDatasetTraining } from '../training/schema'; import { MongoDatasetTraining } from '../training/schema';
import { delay } from '@fastgpt/global/common/system/utils';
import { MongoDatasetData } from '../data/schema'; import { MongoDatasetData } from '../data/schema';
import { delImgByRelatedId } from '../../../common/file/image/controller'; import { delImgByRelatedId } from '../../../common/file/image/controller';
import { deleteDatasetDataVector } from '../../../common/vectorStore/controller'; import { deleteDatasetDataVector } from '../../../common/vectorStore/controller';

View File

@ -0,0 +1,6 @@
export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
tableLocal = 'tableLocal'
}

View File

@ -1,14 +1,16 @@
import { delay } from '@fastgpt/global/common/system/utils';
import { MongoDatasetTraining } from './schema'; import { MongoDatasetTraining } from './schema';
import type { import type {
PushDatasetDataChunkProps, PushDatasetDataChunkProps,
PushDatasetDataProps, PushDatasetDataProps,
PushDatasetDataResponse PushDatasetDataResponse
} from '@fastgpt/global/core/dataset/api.d'; } from '@fastgpt/global/core/dataset/api.d';
import { getCollectionWithDataset } from '../controller';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants'; import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { simpleText } from '@fastgpt/global/common/string/tools'; import { simpleText } from '@fastgpt/global/common/string/tools';
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken'; import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
import { ClientSession } from '../../../common/mongo';
import { getLLMModel, getVectorModel } from '../../ai/model';
import { addLog } from '../../../common/system/log';
import { getCollectionWithDataset } from '../controller';
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => { export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
try { try {
@ -23,31 +25,52 @@ export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> =>
} catch (error) {} } catch (error) {}
}; };
export async function pushDataListToTrainingQueue({ export const pushDataListToTrainingQueueByCollectionId = async ({
teamId,
tmbId,
collectionId, collectionId,
data, ...props
prompt,
billId,
trainingMode = TrainingModeEnum.chunk
}: { }: {
teamId: string; teamId: string;
tmbId: string; tmbId: string;
} & PushDatasetDataProps): Promise<PushDatasetDataResponse> { session?: ClientSession;
const vectorModelList = global.vectorModels; } & PushDatasetDataProps) => {
const datasetModelList = global.llmModels;
const { const {
datasetId: { _id: datasetId, vectorModel, agentModel } datasetId: { _id: datasetId, agentModel, vectorModel }
} = await getCollectionWithDataset(collectionId); } = await getCollectionWithDataset(collectionId);
return pushDataListToTrainingQueue({
...props,
datasetId,
collectionId,
agentModel,
vectorModel
});
};
export async function pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId,
collectionId,
agentModel,
vectorModel,
data,
prompt,
billId,
trainingMode = TrainingModeEnum.chunk,
session
}: {
teamId: string;
tmbId: string;
datasetId: string;
agentModel: string;
vectorModel: string;
session?: ClientSession;
} & PushDatasetDataProps): Promise<PushDatasetDataResponse> {
const checkModelValid = async () => { const checkModelValid = async () => {
const agentModelData = datasetModelList?.find((item) => item.model === agentModel); const agentModelData = getLLMModel(agentModel);
if (!agentModelData) { if (!agentModelData) {
return Promise.reject(`File model ${agentModel} is inValid`); return Promise.reject(`File model ${agentModel} is inValid`);
} }
const vectorModelData = vectorModelList?.find((item) => item.model === vectorModel); const vectorModelData = getVectorModel(vectorModel);
if (!vectorModelData) { if (!vectorModelData) {
return Promise.reject(`Vector model ${vectorModel} is inValid`); return Promise.reject(`Vector model ${vectorModel} is inValid`);
} }
@ -124,52 +147,43 @@ export async function pushDataListToTrainingQueue({
}); });
// insert data to db // insert data to db
const insertData = async (dataList: PushDatasetDataChunkProps[], retry = 3): Promise<number> => { const insertLen = filterResult.success.length;
try { const failedDocuments: PushDatasetDataChunkProps[] = [];
const results = await MongoDatasetTraining.insertMany(
dataList.map((item, i) => ({
teamId,
tmbId,
datasetId,
collectionId,
billId,
mode: trainingMode,
prompt,
model,
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex ?? 0,
weight: weight ?? 0,
indexes: item.indexes
}))
);
await delay(500);
return results.length;
} catch (error) {
if (retry > 0) {
await delay(500);
return insertData(dataList, retry - 1);
}
return Promise.reject(error);
}
};
let insertLen = 0; // 使用 insertMany 批量插入
const chunkSize = 50; try {
const chunkList = filterResult.success.reduce( await MongoDatasetTraining.insertMany(
(acc, cur) => { filterResult.success.map((item) => ({
const lastChunk = acc[acc.length - 1]; teamId,
if (lastChunk.length < chunkSize) { tmbId,
lastChunk.push(cur); datasetId,
} else { collectionId,
acc.push([cur]); billId,
mode: trainingMode,
prompt,
model,
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex ?? 0,
weight: weight ?? 0,
indexes: item.indexes
})),
{
session
} }
return acc; );
}, } catch (error: any) {
[[]] as PushDatasetDataChunkProps[][] addLog.error(`Insert error`, error);
); // 如果有错误,将失败的文档添加到失败列表中
for await (const chunks of chunkList) { error.writeErrors.forEach((writeError: any) => {
insertLen += await insertData(chunks); failedDocuments.push(data[writeError.index]);
});
console.log('failed', failedDocuments);
}
// 对于失败的文档,尝试单独插入
for await (const item of failedDocuments) {
await MongoDatasetTraining.create(item);
} }
delete filterResult.success; delete filterResult.success;

View File

@ -2,6 +2,7 @@ import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { addLog } from '../../../common/system/log'; import { addLog } from '../../../common/system/log';
import { getErrText } from '@fastgpt/global/common/error/utils'; import { getErrText } from '@fastgpt/global/common/error/utils';
import { MongoDatasetTraining } from './schema'; import { MongoDatasetTraining } from './schema';
import Papa from 'papaparse';
export const checkInvalidChunkAndLock = async ({ export const checkInvalidChunkAndLock = async ({
err, err,
@ -39,3 +40,18 @@ export const checkInvalidChunkAndLock = async ({
} }
return false; return false;
}; };
export const parseCsvTable2Chunks = (rawText: string) => {
const csvArr = Papa.parse(rawText).data as string[][];
const chunks = csvArr
.map((item) => ({
q: item[0] || '',
a: item[1] || ''
}))
.filter((item) => item.q || item.a);
return {
chunks
};
};

View File

@ -4,27 +4,36 @@
"dependencies": { "dependencies": {
"@fastgpt/global": "workspace:*", "@fastgpt/global": "workspace:*",
"@node-rs/jieba": "1.10.0", "@node-rs/jieba": "1.10.0",
"@xmldom/xmldom": "^0.8.10",
"axios": "^1.5.1", "axios": "^1.5.1",
"cheerio": "1.0.0-rc.12", "cheerio": "1.0.0-rc.12",
"cookie": "^0.5.0", "cookie": "^0.5.0",
"date-fns": "2.30.0", "date-fns": "2.30.0",
"dayjs": "^1.11.7", "dayjs": "^1.11.7",
"decompress": "^4.2.1",
"encoding": "^0.1.13", "encoding": "^0.1.13",
"file-type": "^19.0.0",
"json5": "^2.2.3", "json5": "^2.2.3",
"jsonwebtoken": "^9.0.2", "jsonwebtoken": "^9.0.2",
"mammoth": "^1.6.0",
"mongoose": "^7.0.2", "mongoose": "^7.0.2",
"multer": "1.4.5-lts.1", "multer": "1.4.5-lts.1",
"next": "13.5.2", "next": "13.5.2",
"nextjs-cors": "^2.1.2", "nextjs-cors": "^2.1.2",
"node-cron": "^3.0.3", "node-cron": "^3.0.3",
"node-xlsx": "^0.23.0",
"papaparse": "5.4.1",
"pdfjs-dist": "4.0.269",
"pg": "^8.10.0", "pg": "^8.10.0",
"tunnel": "^0.0.6" "tunnel": "^0.0.6"
}, },
"devDependencies": { "devDependencies": {
"@types/cookie": "^0.5.2", "@types/cookie": "^0.5.2",
"@types/decompress": "^4.2.7",
"@types/jsonwebtoken": "^9.0.3", "@types/jsonwebtoken": "^9.0.3",
"@types/multer": "^1.4.10", "@types/multer": "^1.4.10",
"@types/node-cron": "^3.0.11", "@types/node-cron": "^3.0.11",
"@types/papaparse": "5.3.7",
"@types/pg": "^8.6.6", "@types/pg": "^8.6.6",
"@types/tunnel": "^0.0.4" "@types/tunnel": "^0.0.4"
} }

View File

@ -0,0 +1,42 @@
import { AuthResponseType } from '@fastgpt/global/support/permission/type';
import { AuthModeType } from '../type';
import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { parseHeaderCert } from '../controller';
import { getFileById } from '../../../common/file/gridfs/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
export async function authFile({
fileId,
per = 'owner',
...props
}: AuthModeType & {
fileId: string;
}): Promise<
AuthResponseType & {
file: DatasetFileSchema;
}
> {
const authRes = await parseHeaderCert(props);
const { teamId, tmbId } = authRes;
const file = await getFileById({ bucketName: BucketNameEnum.dataset, fileId });
if (!file) {
return Promise.reject(CommonErrEnum.fileNotFound);
}
if (file.metadata?.teamId !== teamId) {
return Promise.reject(CommonErrEnum.unAuthFile);
}
if (per === 'owner' && file.metadata?.tmbId !== tmbId) {
return Promise.reject(CommonErrEnum.unAuthFile);
}
return {
...authRes,
isOwner: per === 'owner',
canWrite: per === 'owner',
file
};
}

View File

@ -1,40 +0,0 @@
import Papa from 'papaparse';
import { readFileRawText } from './rawText';
/**
* read csv to json
* @response {
* header: string[],
* data: string[][]
* }
*/
export const readCsvContent = async ({ file }: { file: File }) => {
try {
const { rawText: textArr } = await readFileRawText(file);
const csvArr = Papa.parse(textArr).data as string[][];
if (csvArr.length === 0) {
throw new Error('csv 解析失败');
}
const header = csvArr.shift() as string[];
// add title to data
const rawText = csvArr
.map((item) =>
item.map((value, index) => {
if (!header[index]) return value;
return `${header[index]}: ${value}`;
})
)
.flat()
.join('\n');
return {
rawText,
header,
data: csvArr.map((item) => item)
};
} catch (error) {
return Promise.reject('解析 csv 文件失败');
}
};

View File

@ -1,21 +0,0 @@
import { htmlStr2Md } from '../../string/markdown';
import { readFileRawText } from './rawText';
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
export const readHtmlFile = async ({
file,
uploadImgController
}: {
file: File;
uploadImgController?: (base64: string) => Promise<string>;
}) => {
const { rawText } = await readFileRawText(file);
const md = htmlStr2Md(rawText);
const simpleMd = await markdownProcess({
rawText: md,
uploadImgController
});
return { rawText: simpleMd };
};

View File

@ -1,49 +0,0 @@
import { loadFile2Buffer } from '../utils';
import { readCsvContent } from './csv';
import { readHtmlFile } from './html';
import { readMdFile } from './md';
import { readPdfFile } from './pdf';
import { readFileRawText } from './rawText';
import { readWordFile } from './word';
export const readFileRawContent = async ({
file,
uploadBase64Controller
}: {
file: File;
uploadBase64Controller?: (base64: string) => Promise<string>;
}): Promise<{
rawText: string;
}> => {
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
switch (extension) {
case 'txt':
return readFileRawText(file);
case 'md':
return readMdFile({
file,
uploadImgController: uploadBase64Controller
});
case 'html':
return readHtmlFile({
file,
uploadImgController: uploadBase64Controller
});
case 'csv':
return readCsvContent({ file });
case 'pdf':
const pdf = await loadFile2Buffer({ file });
return readPdfFile({ pdf });
case 'docx':
return readWordFile({
file,
uploadImgController: uploadBase64Controller
});
default:
return {
rawText: ''
};
}
};

View File

@ -1,17 +0,0 @@
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
import { readFileRawText } from './rawText';
export const readMdFile = async ({
file,
uploadImgController
}: {
file: File;
uploadImgController?: (base64: string) => Promise<string>;
}) => {
const { rawText: md } = await readFileRawText(file);
const simpleMd = await markdownProcess({
rawText: md,
uploadImgController
});
return { rawText: simpleMd };
};

View File

@ -1,36 +0,0 @@
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
/**
* read file raw text
*/
export const readFileRawText = (file: File) => {
return new Promise<{ rawText: string }>((resolve, reject) => {
try {
const reader = new FileReader();
reader.onload = () => {
//@ts-ignore
const encode = detectFileEncoding(reader.result);
// 再次读取文件,这次使用检测到的编码
const reader2 = new FileReader();
reader2.onload = () => {
resolve({
rawText: reader2.result as string
});
};
reader2.onerror = (err) => {
console.log('Error reading file with detected encoding:', err);
reject('Read file error with detected encoding');
};
reader2.readAsText(file, encode);
};
reader.onerror = (err) => {
console.log('error txt read:', err);
reject('Read file error');
};
reader.readAsBinaryString(file);
} catch (error) {
reject(error);
}
});
};

View File

@ -1,28 +0,0 @@
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
import { htmlStr2Md } from '../../string/markdown';
import { loadFile2Buffer } from '../utils';
import mammoth from 'mammoth';
export const readWordFile = async ({
file,
uploadImgController
}: {
file: File;
uploadImgController?: (base64: string) => Promise<string>;
}) => {
const buffer = await loadFile2Buffer({ file });
const { value: html } = await mammoth.convertToHtml({
arrayBuffer: buffer
});
const md = htmlStr2Md(html);
const rawText = await markdownProcess({
rawText: md,
uploadImgController: uploadImgController
});
return {
rawText
};
};

View File

@ -101,6 +101,7 @@ export const iconPaths = {
'core/dataset/mixedRecall': () => import('./icons/core/dataset/mixedRecall.svg'), 'core/dataset/mixedRecall': () => import('./icons/core/dataset/mixedRecall.svg'),
'core/dataset/modeEmbedding': () => import('./icons/core/dataset/modeEmbedding.svg'), 'core/dataset/modeEmbedding': () => import('./icons/core/dataset/modeEmbedding.svg'),
'core/dataset/rerank': () => import('./icons/core/dataset/rerank.svg'), 'core/dataset/rerank': () => import('./icons/core/dataset/rerank.svg'),
'core/dataset/splitLight': () => import('./icons/core/dataset/splitLight.svg'),
'core/dataset/tableCollection': () => import('./icons/core/dataset/tableCollection.svg'), 'core/dataset/tableCollection': () => import('./icons/core/dataset/tableCollection.svg'),
'core/dataset/websiteDataset': () => import('./icons/core/dataset/websiteDataset.svg'), 'core/dataset/websiteDataset': () => import('./icons/core/dataset/websiteDataset.svg'),
'core/modules/basicNode': () => import('./icons/core/modules/basicNode.svg'), 'core/modules/basicNode': () => import('./icons/core/modules/basicNode.svg'),

View File

@ -0,0 +1,6 @@
<svg t="1711938287623" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg"
p-id="5143">
<path
d="M153.6 153.6h716.8a51.2 51.2 0 0 1 0 102.4H153.6a51.2 51.2 0 1 1 0-102.4z m0 614.4h716.8a51.2 51.2 0 0 1 0 102.4H153.6a51.2 51.2 0 0 1 0-102.4z m0-307.2h131.6352a51.2 51.2 0 1 1 0 102.4H153.6a51.2 51.2 0 0 1 0-102.4z m292.5568 0h131.6864a51.2 51.2 0 0 1 0 102.4H446.1568a51.2 51.2 0 0 1 0-102.4z m292.608 0H870.4a51.2 51.2 0 0 1 0 102.4h-131.6352a51.2 51.2 0 0 1 0-102.4z"
p-id="5144"></path>
</svg>

After

Width:  |  Height:  |  Size: 554 B

View File

@ -0,0 +1,70 @@
import React from 'react';
import MyIcon from '../Icon';
import {
Drawer,
DrawerBody,
DrawerHeader,
DrawerOverlay,
DrawerContent,
DrawerCloseButton,
DrawerContentProps,
Flex,
Image
} from '@chakra-ui/react';
import { useLoading } from '../../../hooks/useLoading';
type Props = DrawerContentProps & {
onClose: () => void;
iconSrc?: string;
title?: any;
isLoading?: boolean;
};
const MyRightDrawer = ({
onClose,
iconSrc,
title,
maxW = ['90vw', '30vw'],
children,
isLoading,
...props
}: Props) => {
const { Loading } = useLoading();
return (
<Drawer isOpen placement="right" onClose={onClose}>
<DrawerOverlay />
<DrawerContent
maxW={maxW}
{...props}
h={'94%'}
mt={'2%'}
borderLeftRadius={'lg'}
overflow={'hidden'}
>
<DrawerCloseButton />
<DrawerHeader>
<Flex alignItems={'center'} pr={2}>
{iconSrc && (
<>
{iconSrc.startsWith('/') ? (
<Image mr={3} objectFit={'contain'} alt="" src={iconSrc} w={'20px'} />
) : (
<MyIcon mr={3} name={iconSrc as any} w={'20px'} />
)}
</>
)}
{title}
</Flex>
<DrawerCloseButton zIndex={1} />
</DrawerHeader>
<DrawerBody>
{children}
<Loading loading={isLoading} fixed={false} />
</DrawerBody>
</DrawerContent>
</Drawer>
);
};
export default MyRightDrawer;

View File

@ -2,6 +2,8 @@ import React from 'react';
import { Box, Flex, useTheme, Grid, type GridProps } from '@chakra-ui/react'; import { Box, Flex, useTheme, Grid, type GridProps } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
import MyTooltip from '../MyTooltip'; import MyTooltip from '../MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import QuestionTip from '../MyTooltip/QuestionTip';
// @ts-ignore // @ts-ignore
interface Props extends GridProps { interface Props extends GridProps {
@ -36,58 +38,59 @@ const LeftRadio = ({
return ( return (
<Grid gridGap={[3, 5]} fontSize={['sm', 'md']} {...props}> <Grid gridGap={[3, 5]} fontSize={['sm', 'md']} {...props}>
{list.map((item) => ( {list.map((item) => (
<MyTooltip key={item.value} label={item.tooltip}> <Flex
<Flex alignItems={item.desc ? align : 'center'}
alignItems={item.desc ? align : 'center'} key={item.value}
cursor={'pointer'} cursor={'pointer'}
userSelect={'none'} userSelect={'none'}
px={px} px={px}
py={py} py={py}
border={theme.borders.sm} border={theme.borders.sm}
borderWidth={'1px'} borderWidth={'1px'}
borderRadius={'md'} borderRadius={'md'}
position={'relative'} position={'relative'}
{...(value === item.value {...(value === item.value
? { ? {
borderColor: 'primary.400', borderColor: 'primary.400',
bg: activeBg, bg: activeBg,
boxShadow: 'focus' boxShadow: 'focus'
}
: {
bg: defaultBg,
_hover: {
borderColor: 'primary.300'
} }
: { })}
bg: defaultBg, onClick={() => onChange(item.value)}
_hover: { >
borderColor: 'primary.300' <Box
} w={'18px'}
})} h={'18px'}
onClick={() => onChange(item.value)} borderWidth={'2.4px'}
borderColor={value === item.value ? 'primary.015' : 'transparent'}
borderRadius={'50%'}
mr={3}
> >
<Box <Flex
w={'18px'} w={'100%'}
h={'18px'} h={'100%'}
borderWidth={'2.4px'} borderWidth={'1px'}
borderColor={value === item.value ? 'primary.015' : 'transparent'} borderColor={value === item.value ? 'primary.600' : 'borderColor.high'}
bg={value === item.value ? 'primary.1' : 'transparent'}
borderRadius={'50%'} borderRadius={'50%'}
mr={3} alignItems={'center'}
justifyContent={'center'}
> >
<Flex <Box
w={'100%'} w={'5px'}
h={'100%'} h={'5px'}
borderWidth={'1px'}
borderColor={value === item.value ? 'primary.600' : 'borderColor.high'}
bg={value === item.value ? 'primary.1' : 'transparent'}
borderRadius={'50%'} borderRadius={'50%'}
alignItems={'center'} bg={value === item.value ? 'primary.600' : 'transparent'}
justifyContent={'center'} ></Box>
> </Flex>
<Box </Box>
w={'5px'} <Box flex={'1 0 0'}>
h={'5px'} <Flex alignItems={'center'}>
borderRadius={'50%'}
bg={value === item.value ? 'primary.600' : 'transparent'}
></Box>
</Flex>
</Box>
<Box flex={'1 0 0'}>
<Box <Box
color={'myGray.900'} color={'myGray.900'}
fontWeight={item.desc ? '500' : 'normal'} fontWeight={item.desc ? '500' : 'normal'}
@ -95,15 +98,16 @@ const LeftRadio = ({
> >
{typeof item.title === 'string' ? t(item.title) : item.title} {typeof item.title === 'string' ? t(item.title) : item.title}
</Box> </Box>
{!!item.desc && ( {!!item.tooltip && <QuestionTip label={item.tooltip} ml={1} color={'myGray.600'} />}
<Box fontSize={'xs'} color={'myGray.500'} lineHeight={1.2}> </Flex>
{t(item.desc)} {!!item.desc && (
</Box> <Box fontSize={'xs'} color={'myGray.500'} lineHeight={1.2}>
)} {t(item.desc)}
{item?.children} </Box>
</Box> )}
</Flex> {item?.children}
</MyTooltip> </Box>
</Flex>
))} ))}
</Grid> </Grid>
); );

View File

@ -12,31 +12,31 @@
"@emotion/styled": "^11.11.0", "@emotion/styled": "^11.11.0",
"@fastgpt/global": "workspace:*", "@fastgpt/global": "workspace:*",
"@fingerprintjs/fingerprintjs": "^4.2.1", "@fingerprintjs/fingerprintjs": "^4.2.1",
"@lexical/react": "0.12.6",
"@lexical/text": "0.12.6",
"@lexical/utils": "0.12.6",
"@monaco-editor/react": "^4.6.0", "@monaco-editor/react": "^4.6.0",
"mammoth": "^1.6.0", "@tanstack/react-query": "^4.24.10",
"date-fns": "2.30.0",
"dayjs": "^1.11.7",
"i18next": "23.10.0", "i18next": "23.10.0",
"joplin-turndown-plugin-gfm": "^1.0.12", "joplin-turndown-plugin-gfm": "^1.0.12",
"lexical": "0.12.6",
"lodash": "^4.17.21",
"mammoth": "^1.6.0",
"next-i18next": "15.2.0", "next-i18next": "15.2.0",
"papaparse": "^5.4.1",
"pdfjs-dist": "4.0.269", "pdfjs-dist": "4.0.269",
"react": "18.2.0", "react": "18.2.0",
"react-day-picker": "^8.7.1",
"react-dom": "18.2.0", "react-dom": "18.2.0",
"react-i18next": "13.5.0", "react-i18next": "13.5.0",
"turndown": "^7.1.2", "turndown": "^7.1.2"
"lexical": "0.12.6",
"@lexical/react": "0.12.6",
"papaparse": "^5.4.1",
"@lexical/utils": "0.12.6",
"@lexical/text": "0.12.6",
"date-fns": "2.30.0",
"react-day-picker": "^8.7.1",
"lodash": "^4.17.21",
"@tanstack/react-query": "^4.24.10",
"dayjs": "^1.11.7"
}, },
"devDependencies": { "devDependencies": {
"@types/lodash": "^4.14.191", "@types/lodash": "^4.14.191",
"@types/react": "18.2.0",
"@types/papaparse": "^5.3.7", "@types/papaparse": "^5.3.7",
"@types/react": "18.2.0",
"@types/react-dom": "18.2.0", "@types/react-dom": "18.2.0",
"@types/turndown": "^5.0.4" "@types/turndown": "^5.0.4"
} }

334
pnpm-lock.yaml generated
View File

@ -99,6 +99,9 @@ importers:
'@node-rs/jieba': '@node-rs/jieba':
specifier: 1.10.0 specifier: 1.10.0
version: 1.10.0 version: 1.10.0
'@xmldom/xmldom':
specifier: ^0.8.10
version: 0.8.10
axios: axios:
specifier: ^1.5.1 specifier: ^1.5.1
version: 1.6.8 version: 1.6.8
@ -114,15 +117,24 @@ importers:
dayjs: dayjs:
specifier: ^1.11.7 specifier: ^1.11.7
version: 1.11.10 version: 1.11.10
decompress:
specifier: ^4.2.1
version: 4.2.1
encoding: encoding:
specifier: ^0.1.13 specifier: ^0.1.13
version: 0.1.13 version: 0.1.13
file-type:
specifier: ^19.0.0
version: 19.0.0
json5: json5:
specifier: ^2.2.3 specifier: ^2.2.3
version: 2.2.3 version: 2.2.3
jsonwebtoken: jsonwebtoken:
specifier: ^9.0.2 specifier: ^9.0.2
version: 9.0.2 version: 9.0.2
mammoth:
specifier: ^1.6.0
version: 1.7.0
mongoose: mongoose:
specifier: ^7.0.2 specifier: ^7.0.2
version: 7.6.10 version: 7.6.10
@ -138,6 +150,15 @@ importers:
node-cron: node-cron:
specifier: ^3.0.3 specifier: ^3.0.3
version: 3.0.3 version: 3.0.3
node-xlsx:
specifier: ^0.23.0
version: 0.23.0
papaparse:
specifier: 5.4.1
version: 5.4.1
pdfjs-dist:
specifier: 4.0.269
version: 4.0.269(encoding@0.1.13)
pg: pg:
specifier: ^8.10.0 specifier: ^8.10.0
version: 8.11.3 version: 8.11.3
@ -148,6 +169,9 @@ importers:
'@types/cookie': '@types/cookie':
specifier: ^0.5.2 specifier: ^0.5.2
version: 0.5.4 version: 0.5.4
'@types/decompress':
specifier: ^4.2.7
version: 4.2.7
'@types/jsonwebtoken': '@types/jsonwebtoken':
specifier: ^9.0.3 specifier: ^9.0.3
version: 9.0.6 version: 9.0.6
@ -157,6 +181,9 @@ importers:
'@types/node-cron': '@types/node-cron':
specifier: ^3.0.11 specifier: ^3.0.11
version: 3.0.11 version: 3.0.11
'@types/papaparse':
specifier: 5.3.7
version: 5.3.7
'@types/pg': '@types/pg':
specifier: ^8.6.6 specifier: ^8.6.6
version: 8.11.3 version: 8.11.3
@ -240,7 +267,7 @@ importers:
version: 5.4.1 version: 5.4.1
pdfjs-dist: pdfjs-dist:
specifier: 4.0.269 specifier: 4.0.269
version: 4.0.269 version: 4.0.269(encoding@0.1.13)
react: react:
specifier: 18.2.0 specifier: 18.2.0
version: 18.2.0 version: 18.2.0
@ -3789,10 +3816,9 @@ packages:
yjs: 13.6.14 yjs: 13.6.14
dev: false dev: false
/@mapbox/node-pre-gyp@1.0.11: /@mapbox/node-pre-gyp@1.0.11(encoding@0.1.13):
resolution: {integrity: sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==} resolution: {integrity: sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==}
hasBin: true hasBin: true
requiresBuild: true
dependencies: dependencies:
detect-libc: 2.0.3 detect-libc: 2.0.3
https-proxy-agent: 5.0.1 https-proxy-agent: 5.0.1
@ -4522,6 +4548,10 @@ packages:
use-sync-external-store: 1.2.0(react@18.2.0) use-sync-external-store: 1.2.0(react@18.2.0)
dev: false dev: false
/@tokenizer/token@0.3.0:
resolution: {integrity: sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==}
dev: false
/@trysound/sax@0.2.0: /@trysound/sax@0.2.0:
resolution: {integrity: sha512-L7z9BgrNEcYyUYtF+HaEfiS5ebkh9jXqbszz7pC0hRBPaatV0XjSD3+eHrpqFemQfgwiFF0QPIarnIihIDn7OA==} resolution: {integrity: sha512-L7z9BgrNEcYyUYtF+HaEfiS5ebkh9jXqbszz7pC0hRBPaatV0XjSD3+eHrpqFemQfgwiFF0QPIarnIihIDn7OA==}
engines: {node: '>=10.13.0'} engines: {node: '>=10.13.0'}
@ -4737,6 +4767,12 @@ packages:
'@types/ms': 0.7.34 '@types/ms': 0.7.34
dev: false dev: false
/@types/decompress@4.2.7:
resolution: {integrity: sha512-9z+8yjKr5Wn73Pt17/ldnmQToaFHZxK0N1GHysuk/JIPT8RIdQeoInM01wWPgypRcvb6VH1drjuFpQ4zmY437g==}
dependencies:
'@types/node': 20.11.30
dev: true
/@types/estree@1.0.5: /@types/estree@1.0.5:
resolution: {integrity: sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==} resolution: {integrity: sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==}
dev: true dev: true
@ -4876,6 +4912,12 @@ packages:
'@types/node': 20.11.30 '@types/node': 20.11.30
dev: true dev: true
/@types/papaparse@5.3.7:
resolution: {integrity: sha512-f2HKmlnPdCvS0WI33WtCs5GD7X1cxzzS/aduaxSu3I7TbhWlENjSPs6z5TaB9K0J+BH1jbmqTaM+ja5puis4wg==}
dependencies:
'@types/node': 20.11.30
dev: true
/@types/parse-json@4.0.2: /@types/parse-json@4.0.2:
resolution: {integrity: sha512-dISoDXWWQwUquiKsyZ4Ng+HX2KsPL7LyHKHQwgGFEA3IaKac4Obd+h2a/a6waisAoepJlBcx9paWqjA8/HVjCw==} resolution: {integrity: sha512-dISoDXWWQwUquiKsyZ4Ng+HX2KsPL7LyHKHQwgGFEA3IaKac4Obd+h2a/a6waisAoepJlBcx9paWqjA8/HVjCw==}
@ -5550,6 +5592,13 @@ packages:
resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==} resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==}
engines: {node: '>=8'} engines: {node: '>=8'}
/bl@1.2.3:
resolution: {integrity: sha512-pvcNpa0UU69UT341rO6AYy4FVAIkUHuZXRIWbq+zHnsVcRzDDjIAhGuuYoi0d//cwIwtt4pkpKycWEfjdV+vww==}
dependencies:
readable-stream: 2.3.8
safe-buffer: 5.2.1
dev: false
/bluebird@3.4.7: /bluebird@3.4.7:
resolution: {integrity: sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==} resolution: {integrity: sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==}
dev: false dev: false
@ -5610,10 +5659,29 @@ packages:
engines: {node: '>=14.20.1'} engines: {node: '>=14.20.1'}
dev: false dev: false
/buffer-alloc-unsafe@1.1.0:
resolution: {integrity: sha512-TEM2iMIEQdJ2yjPJoSIsldnleVaAk1oW3DBVUykyOLsEsFmEc9kn+SFFPz+gl54KQNxlDnAwCXosOS9Okx2xAg==}
dev: false
/buffer-alloc@1.2.0:
resolution: {integrity: sha512-CFsHQgjtW1UChdXgbyJGtnm+O/uLQeZdtbDo8mfUgYXCHSM1wgrVxXm6bSyrUuErEb+4sYVGCzASBRot7zyrow==}
dependencies:
buffer-alloc-unsafe: 1.1.0
buffer-fill: 1.0.0
dev: false
/buffer-crc32@0.2.13:
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
dev: false
/buffer-equal-constant-time@1.0.1: /buffer-equal-constant-time@1.0.1:
resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==} resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==}
dev: false dev: false
/buffer-fill@1.0.0:
resolution: {integrity: sha512-T7zexNBwiiaCOGDg9xNX9PBmjrubblRkENuptryuI64URkXDFum9il/JGL8Lm8wYfAXpredVXXZz7eMHilimiQ==}
dev: false
/buffer-from@1.1.2: /buffer-from@1.1.2:
resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==} resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==}
dev: false dev: false
@ -5623,6 +5691,13 @@ packages:
engines: {node: '>=4'} engines: {node: '>=4'}
dev: false dev: false
/buffer@5.7.1:
resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==}
dependencies:
base64-js: 1.5.1
ieee754: 1.2.1
dev: false
/busboy@1.6.0: /busboy@1.6.0:
resolution: {integrity: sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==} resolution: {integrity: sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==}
engines: {node: '>=10.16.0'} engines: {node: '>=10.16.0'}
@ -5665,12 +5740,12 @@ packages:
/caniuse-lite@1.0.30001599: /caniuse-lite@1.0.30001599:
resolution: {integrity: sha512-LRAQHZ4yT1+f9LemSMeqdMpMxZcc4RMWdj4tiFe3G8tNkWK+E58g+/tzotb5cU6TbcVJLr4fySiAW7XmxQvZQA==} resolution: {integrity: sha512-LRAQHZ4yT1+f9LemSMeqdMpMxZcc4RMWdj4tiFe3G8tNkWK+E58g+/tzotb5cU6TbcVJLr4fySiAW7XmxQvZQA==}
/canvas@2.11.2: /canvas@2.11.2(encoding@0.1.13):
resolution: {integrity: sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==} resolution: {integrity: sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==}
engines: {node: '>=6'} engines: {node: '>=6'}
requiresBuild: true requiresBuild: true
dependencies: dependencies:
'@mapbox/node-pre-gyp': 1.0.11 '@mapbox/node-pre-gyp': 1.0.11(encoding@0.1.13)
nan: 2.19.0 nan: 2.19.0
simple-get: 3.1.1 simple-get: 3.1.1
transitivePeerDependencies: transitivePeerDependencies:
@ -5909,6 +5984,10 @@ packages:
engines: {node: '>=16'} engines: {node: '>=16'}
dev: true dev: true
/commander@2.20.3:
resolution: {integrity: sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==}
dev: false
/commander@7.2.0: /commander@7.2.0:
resolution: {integrity: sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==} resolution: {integrity: sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==}
engines: {node: '>= 10'} engines: {node: '>= 10'}
@ -6469,6 +6548,59 @@ packages:
dev: false dev: false
optional: true optional: true
/decompress-tar@4.1.1:
resolution: {integrity: sha512-JdJMaCrGpB5fESVyxwpCx4Jdj2AagLmv3y58Qy4GE6HMVjWz1FeVQk1Ct4Kye7PftcdOo/7U7UKzYBJgqnGeUQ==}
engines: {node: '>=4'}
dependencies:
file-type: 5.2.0
is-stream: 1.1.0
tar-stream: 1.6.2
dev: false
/decompress-tarbz2@4.1.1:
resolution: {integrity: sha512-s88xLzf1r81ICXLAVQVzaN6ZmX4A6U4z2nMbOwobxkLoIIfjVMBg7TeguTUXkKeXni795B6y5rnvDw7rxhAq9A==}
engines: {node: '>=4'}
dependencies:
decompress-tar: 4.1.1
file-type: 6.2.0
is-stream: 1.1.0
seek-bzip: 1.0.6
unbzip2-stream: 1.4.3
dev: false
/decompress-targz@4.1.1:
resolution: {integrity: sha512-4z81Znfr6chWnRDNfFNqLwPvm4db3WuZkqV+UgXQzSngG3CEKdBkw5jrv3axjjL96glyiiKjsxJG3X6WBZwX3w==}
engines: {node: '>=4'}
dependencies:
decompress-tar: 4.1.1
file-type: 5.2.0
is-stream: 1.1.0
dev: false
/decompress-unzip@4.0.1:
resolution: {integrity: sha512-1fqeluvxgnn86MOh66u8FjbtJpAFv5wgCT9Iw8rcBqQcCo5tO8eiJw7NNTrvt9n4CRBVq7CstiS922oPgyGLrw==}
engines: {node: '>=4'}
dependencies:
file-type: 3.9.0
get-stream: 2.3.1
pify: 2.3.0
yauzl: 2.10.0
dev: false
/decompress@4.2.1:
resolution: {integrity: sha512-e48kc2IjU+2Zw8cTb6VZcJQ3lgVbS4uuB1TfCHbiZIP/haNXm+SVyhu+87jts5/3ROpd82GSVCoNs/z8l4ZOaQ==}
engines: {node: '>=4'}
dependencies:
decompress-tar: 4.1.1
decompress-tarbz2: 4.1.1
decompress-targz: 4.1.1
decompress-unzip: 4.0.1
graceful-fs: 4.2.11
make-dir: 1.3.0
pify: 2.3.0
strip-dirs: 2.1.0
dev: false
/deep-eql@4.1.3: /deep-eql@4.1.3:
resolution: {integrity: sha512-WaEtAOpRA1MQ0eohqZjpGD8zdI0Ovsm8mmFhaDN8dvDZzyoUMcYDnf5Y6iu7HTXxf8JDS23qWa4a+hKCDyOPzw==} resolution: {integrity: sha512-WaEtAOpRA1MQ0eohqZjpGD8zdI0Ovsm8mmFhaDN8dvDZzyoUMcYDnf5Y6iu7HTXxf8JDS23qWa4a+hKCDyOPzw==}
engines: {node: '>=6'} engines: {node: '>=6'}
@ -6712,6 +6844,12 @@ packages:
iconv-lite: 0.6.3 iconv-lite: 0.6.3
dev: false dev: false
/end-of-stream@1.4.4:
resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==}
dependencies:
once: 1.4.0
dev: false
/enhanced-resolve@5.16.0: /enhanced-resolve@5.16.0:
resolution: {integrity: sha512-O+QWCviPNSSLAD9Ucn8Awv+poAkqn3T1XY5/N7kR7rQO9yfSGWkYZDwpJ+iKF7B8rxaQKWngSqACpgzeapSyoA==} resolution: {integrity: sha512-O+QWCviPNSSLAD9Ucn8Awv+poAkqn3T1XY5/N7kR7rQO9yfSGWkYZDwpJ+iKF7B8rxaQKWngSqACpgzeapSyoA==}
engines: {node: '>=10.13.0'} engines: {node: '>=10.13.0'}
@ -7403,6 +7541,12 @@ packages:
dependencies: dependencies:
format: 0.2.2 format: 0.2.2
/fd-slicer@1.1.0:
resolution: {integrity: sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==}
dependencies:
pend: 1.2.0
dev: false
/file-entry-cache@6.0.1: /file-entry-cache@6.0.1:
resolution: {integrity: sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==} resolution: {integrity: sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==}
engines: {node: ^10.12.0 || >=12.0.0} engines: {node: ^10.12.0 || >=12.0.0}
@ -7410,6 +7554,30 @@ packages:
flat-cache: 3.2.0 flat-cache: 3.2.0
dev: true dev: true
/file-type@19.0.0:
resolution: {integrity: sha512-s7cxa7/leUWLiXO78DVVfBVse+milos9FitauDLG1pI7lNaJ2+5lzPnr2N24ym+84HVwJL6hVuGfgVE+ALvU8Q==}
engines: {node: '>=18'}
dependencies:
readable-web-to-node-stream: 3.0.2
strtok3: 7.0.0
token-types: 5.0.1
dev: false
/file-type@3.9.0:
resolution: {integrity: sha512-RLoqTXE8/vPmMuTI88DAzhMYC99I8BWv7zYP4A1puo5HIjEJ5EX48ighy4ZyKMG9EDXxBgW6e++cn7d1xuFghA==}
engines: {node: '>=0.10.0'}
dev: false
/file-type@5.2.0:
resolution: {integrity: sha512-Iq1nJ6D2+yIO4c8HHg4fyVb8mAJieo1Oloy1mLLaB2PvezNedhBVm+QU7g0qM42aiMbRXTxKKwGD17rjKNJYVQ==}
engines: {node: '>=4'}
dev: false
/file-type@6.2.0:
resolution: {integrity: sha512-YPcTBDV+2Tm0VqjybVd32MHdlEGAtuxS3VAYsumFokDSMG+ROT5wawGlnHDoz7bfMcMDt9hxuXvXwoKUx2fkOg==}
engines: {node: '>=4'}
dev: false
/fill-range@7.0.1: /fill-range@7.0.1:
resolution: {integrity: sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==} resolution: {integrity: sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==}
engines: {node: '>=8'} engines: {node: '>=8'}
@ -7550,6 +7718,10 @@ packages:
engines: {node: '>= 0.6'} engines: {node: '>= 0.6'}
dev: false dev: false
/fs-constants@1.0.0:
resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==}
dev: false
/fs-minipass@2.1.0: /fs-minipass@2.1.0:
resolution: {integrity: sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==} resolution: {integrity: sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==}
engines: {node: '>= 8'} engines: {node: '>= 8'}
@ -7626,6 +7798,14 @@ packages:
engines: {node: '>=6'} engines: {node: '>=6'}
dev: false dev: false
/get-stream@2.3.1:
resolution: {integrity: sha512-AUGhbbemXxrZJRD5cDvKtQxLuYaIbNtDTK8YqupCI393Q2KSTreEsLUN3ZxAWFGiKTzL6nKuzfcIvieflUX9qA==}
engines: {node: '>=0.10.0'}
dependencies:
object-assign: 4.1.1
pinkie-promise: 2.0.1
dev: false
/get-stream@6.0.1: /get-stream@6.0.1:
resolution: {integrity: sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==} resolution: {integrity: sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==}
engines: {node: '>=10'} engines: {node: '>=10'}
@ -7978,6 +8158,10 @@ packages:
safer-buffer: 2.1.2 safer-buffer: 2.1.2
dev: false dev: false
/ieee754@1.2.1:
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
dev: false
/ignore@5.3.1: /ignore@5.3.1:
resolution: {integrity: sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw==} resolution: {integrity: sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw==}
engines: {node: '>= 4'} engines: {node: '>= 4'}
@ -8178,6 +8362,10 @@ packages:
engines: {node: '>= 0.4'} engines: {node: '>= 0.4'}
dev: true dev: true
/is-natural-number@4.0.1:
resolution: {integrity: sha512-Y4LTamMe0DDQIIAlaer9eKebAlDSV6huy+TWhJVPlzZh2o4tRP5SQWFlLn5N0To4mDD22/qdOq+veo1cSISLgQ==}
dev: false
/is-negative-zero@2.0.3: /is-negative-zero@2.0.3:
resolution: {integrity: sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==} resolution: {integrity: sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==}
engines: {node: '>= 0.4'} engines: {node: '>= 0.4'}
@ -8229,6 +8417,11 @@ packages:
call-bind: 1.0.7 call-bind: 1.0.7
dev: true dev: true
/is-stream@1.1.0:
resolution: {integrity: sha512-uQPm8kcs47jx38atAcWTVxyltQYoPT68y9aWYdV6yWXSyW8mzSat0TL6CiWdZeCdF3KrAvpVtnHbTv4RN+rqdQ==}
engines: {node: '>=0.10.0'}
dev: false
/is-stream@3.0.0: /is-stream@3.0.0:
resolution: {integrity: sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==} resolution: {integrity: sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==}
engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
@ -8704,6 +8897,13 @@ packages:
'@jridgewell/sourcemap-codec': 1.4.15 '@jridgewell/sourcemap-codec': 1.4.15
dev: true dev: true
/make-dir@1.3.0:
resolution: {integrity: sha512-2w31R7SJtieJJnQtGc7RVL2StM2vGYVfqUOvUDxH6bC6aJTxPxTF0GnIgCyu7tjockiUWAYQRbxa7vKn34s5sQ==}
engines: {node: '>=4'}
dependencies:
pify: 3.0.0
dev: false
/make-dir@3.1.0: /make-dir@3.1.0:
resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==} resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==}
engines: {node: '>=8'} engines: {node: '>=8'}
@ -9547,6 +9747,14 @@ packages:
/node-releases@2.0.14: /node-releases@2.0.14:
resolution: {integrity: sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==} resolution: {integrity: sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==}
/node-xlsx@0.23.0:
resolution: {integrity: sha512-r3KaSZSsSrK92rbPXnX/vDdxURmPPik0rjJ3A+Pybzpjyrk4G6WyGfj8JIz5dMMEpCmWVpmO4qoVPBxnpLv/8Q==}
engines: {node: '>=10.0.0'}
hasBin: true
dependencies:
xlsx: '@cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz'
dev: false
/non-layered-tidy-tree-layout@2.0.2: /non-layered-tidy-tree-layout@2.0.2:
resolution: {integrity: sha512-gkXMxRzUH+PB0ax9dUN0yYF0S25BqeAYqhgMaLUFmpXLEk7Fcu8f4emJuOAY0V8kjDICxROIKsTAKsV/v355xw==} resolution: {integrity: sha512-gkXMxRzUH+PB0ax9dUN0yYF0S25BqeAYqhgMaLUFmpXLEk7Fcu8f4emJuOAY0V8kjDICxROIKsTAKsV/v355xw==}
dev: false dev: false
@ -9875,17 +10083,26 @@ packages:
resolution: {integrity: sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ==} resolution: {integrity: sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ==}
dev: true dev: true
/pdfjs-dist@4.0.269: /pdfjs-dist@4.0.269(encoding@0.1.13):
resolution: {integrity: sha512-jjWO56tcOjnmPqDf8PmXDeZ781AGvpHMYI3HhNtaFKTRXXPaD1ArSrhVe38/XsrIQJ0onISCND/vuXaWJkiDWw==} resolution: {integrity: sha512-jjWO56tcOjnmPqDf8PmXDeZ781AGvpHMYI3HhNtaFKTRXXPaD1ArSrhVe38/XsrIQJ0onISCND/vuXaWJkiDWw==}
engines: {node: '>=18'} engines: {node: '>=18'}
optionalDependencies: optionalDependencies:
canvas: 2.11.2 canvas: 2.11.2(encoding@0.1.13)
path2d-polyfill: 2.1.1 path2d-polyfill: 2.1.1
transitivePeerDependencies: transitivePeerDependencies:
- encoding - encoding
- supports-color - supports-color
dev: false dev: false
/peek-readable@5.0.0:
resolution: {integrity: sha512-YtCKvLUOvwtMGmrniQPdO7MwPjgkFBtFIrmfSbYmYuq3tKDV/mcfAhBth1+C3ru7uXIZasc/pHnb+YDYNkkj4A==}
engines: {node: '>=14.16'}
dev: false
/pend@1.2.0:
resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==}
dev: false
/pg-cloudflare@1.1.1: /pg-cloudflare@1.1.1:
resolution: {integrity: sha512-xWPagP/4B6BgFO+EKz3JONXv3YDgvkbVrGw2mTo3D6tVDQRh1e7cqVGvyR3BE+eQgAvx1XhW/iEASj4/jCWl3Q==} resolution: {integrity: sha512-xWPagP/4B6BgFO+EKz3JONXv3YDgvkbVrGw2mTo3D6tVDQRh1e7cqVGvyR3BE+eQgAvx1XhW/iEASj4/jCWl3Q==}
requiresBuild: true requiresBuild: true
@ -9979,6 +10196,28 @@ packages:
hasBin: true hasBin: true
dev: true dev: true
/pify@2.3.0:
resolution: {integrity: sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==}
engines: {node: '>=0.10.0'}
dev: false
/pify@3.0.0:
resolution: {integrity: sha512-C3FsVNH1udSEX48gGX1xfvwTWfsYWj5U+8/uK15BGzIGrKoUpghX8hWZwa/OFnakBiiVNmBvemTJR5mcy7iPcg==}
engines: {node: '>=4'}
dev: false
/pinkie-promise@2.0.1:
resolution: {integrity: sha512-0Gni6D4UcLTbv9c57DfxDGdr41XfgUjqWZu492f0cIGr16zDU06BWP/RAEvOuo7CQ0CNjHaLlM59YJJFm3NWlw==}
engines: {node: '>=0.10.0'}
dependencies:
pinkie: 2.0.4
dev: false
/pinkie@2.0.4:
resolution: {integrity: sha512-MnUuEycAemtSaeFSjXKW/aroV7akBbY+Sv+RkyqFjgAe73F+MR0TBWKBRDkmfWq/HiFmdavfZ1G7h4SPZXaCSg==}
engines: {node: '>=0.10.0'}
dev: false
/pkg-types@1.0.3: /pkg-types@1.0.3:
resolution: {integrity: sha512-nN7pYi0AQqJnoLPC9eHFQ8AcyaixBUOwvqc5TDnIKCMEE6I0y8P7OKA7fPexsXGCGxQDl/cmrLAp26LhcwxZ4A==} resolution: {integrity: sha512-nN7pYi0AQqJnoLPC9eHFQ8AcyaixBUOwvqc5TDnIKCMEE6I0y8P7OKA7fPexsXGCGxQDl/cmrLAp26LhcwxZ4A==}
dependencies: dependencies:
@ -10396,7 +10635,13 @@ packages:
string_decoder: 1.3.0 string_decoder: 1.3.0
util-deprecate: 1.0.2 util-deprecate: 1.0.2
dev: false dev: false
optional: true
/readable-web-to-node-stream@3.0.2:
resolution: {integrity: sha512-ePeK6cc1EcKLEhJFt/AebMCLL+GgSKhuygrZ/GLaKZYEecIgIECf4UaUuaByiGtzckwR4ain9VzUh95T1exYGw==}
engines: {node: '>=8'}
dependencies:
readable-stream: 3.6.2
dev: false
/readdirp@3.6.0: /readdirp@3.6.0:
resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==} resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==}
@ -10715,6 +10960,13 @@ packages:
dependencies: dependencies:
loose-envify: 1.4.0 loose-envify: 1.4.0
/seek-bzip@1.0.6:
resolution: {integrity: sha512-e1QtP3YL5tWww8uKaOCQ18UxIT2laNBXHjV/S2WYCiK4udiv8lkG89KRIoCjUagnAmCBurjF4zEVX2ByBbnCjQ==}
hasBin: true
dependencies:
commander: 2.20.3
dev: false
/semver@6.3.1: /semver@6.3.1:
resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==} resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==}
hasBin: true hasBin: true
@ -11024,7 +11276,6 @@ packages:
dependencies: dependencies:
safe-buffer: 5.2.1 safe-buffer: 5.2.1
dev: false dev: false
optional: true
/strip-ansi@6.0.1: /strip-ansi@6.0.1:
resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==}
@ -11044,6 +11295,12 @@ packages:
engines: {node: '>=4'} engines: {node: '>=4'}
dev: true dev: true
/strip-dirs@2.1.0:
resolution: {integrity: sha512-JOCxOeKLm2CAS73y/U4ZeZPTkE+gNVCzKt7Eox84Iej1LT/2pTWYpZKJuxwQpvX1LiZb1xokNR7RLfuBAa7T3g==}
dependencies:
is-natural-number: 4.0.1
dev: false
/strip-final-newline@3.0.0: /strip-final-newline@3.0.0:
resolution: {integrity: sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw==} resolution: {integrity: sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw==}
engines: {node: '>=12'} engines: {node: '>=12'}
@ -11060,6 +11317,14 @@ packages:
js-tokens: 8.0.3 js-tokens: 8.0.3
dev: true dev: true
/strtok3@7.0.0:
resolution: {integrity: sha512-pQ+V+nYQdC5H3Q7qBZAz/MO6lwGhoC2gOAjuouGf/VO0m7vQRh8QNMl2Uf6SwAtzZ9bOw3UIeBukEGNJl5dtXQ==}
engines: {node: '>=14.16'}
dependencies:
'@tokenizer/token': 0.3.0
peek-readable: 5.0.0
dev: false
/style-to-object@0.4.4: /style-to-object@0.4.4:
resolution: {integrity: sha512-HYNoHZa2GorYNyqiCaBgsxvcJIn7OHq6inEga+E6Ke3m5JkoqpQbnFssk4jwe+K7AhGa2fcha4wSOf1Kn01dMg==} resolution: {integrity: sha512-HYNoHZa2GorYNyqiCaBgsxvcJIn7OHq6inEga+E6Ke3m5JkoqpQbnFssk4jwe+K7AhGa2fcha4wSOf1Kn01dMg==}
dependencies: dependencies:
@ -11131,6 +11396,19 @@ packages:
engines: {node: '>=6'} engines: {node: '>=6'}
dev: true dev: true
/tar-stream@1.6.2:
resolution: {integrity: sha512-rzS0heiNf8Xn7/mpdSVVSMAWAoy9bfb1WOTYC78Z0UQKeKa/CWS8FOq0lKGNa8DWKAn9gxjCvMLYc5PGXYlK2A==}
engines: {node: '>= 0.8.0'}
dependencies:
bl: 1.2.3
buffer-alloc: 1.2.0
end-of-stream: 1.4.4
fs-constants: 1.0.0
readable-stream: 2.3.8
to-buffer: 1.1.1
xtend: 4.0.2
dev: false
/tar@6.2.0: /tar@6.2.0:
resolution: {integrity: sha512-/Wo7DcT0u5HUV486xg675HtjNd3BXZ6xDbzsCUZPt5iw8bTQ63bP0Raut3mvro9u+CUyq7YQd8Cx55fsZXxqLQ==} resolution: {integrity: sha512-/Wo7DcT0u5HUV486xg675HtjNd3BXZ6xDbzsCUZPt5iw8bTQ63bP0Raut3mvro9u+CUyq7YQd8Cx55fsZXxqLQ==}
engines: {node: '>=10'} engines: {node: '>=10'}
@ -11149,6 +11427,10 @@ packages:
resolution: {integrity: sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==} resolution: {integrity: sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==}
dev: true dev: true
/through@2.3.8:
resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==}
dev: false
/timezones-list@3.0.3: /timezones-list@3.0.3:
resolution: {integrity: sha512-C+Vdvvj2c1xB6pu81pOX8geo6mrk/QsudFVlTVQET7QQwu8WAIyhDNeCrK5grU7EMzmbKLWqz7uU6dN8fvQvPQ==} resolution: {integrity: sha512-C+Vdvvj2c1xB6pu81pOX8geo6mrk/QsudFVlTVQET7QQwu8WAIyhDNeCrK5grU7EMzmbKLWqz7uU6dN8fvQvPQ==}
dev: false dev: false
@ -11171,6 +11453,10 @@ packages:
engines: {node: '>=14.0.0'} engines: {node: '>=14.0.0'}
dev: true dev: true
/to-buffer@1.1.1:
resolution: {integrity: sha512-lx9B5iv7msuFYE3dytT+KE5tap+rNYw+K4jVkb9R/asAb+pbBSM17jtunHplhBe6RRJdZx3Pn2Jph24O32mOVg==}
dev: false
/to-fast-properties@2.0.0: /to-fast-properties@2.0.0:
resolution: {integrity: sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==} resolution: {integrity: sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==}
engines: {node: '>=4'} engines: {node: '>=4'}
@ -11190,6 +11476,14 @@ packages:
engines: {node: '>=0.6'} engines: {node: '>=0.6'}
dev: false dev: false
/token-types@5.0.1:
resolution: {integrity: sha512-Y2fmSnZjQdDb9W4w4r1tswlMHylzWIeOKpx0aZH9BgGtACHhrk3OkT52AzwcuqTRBZtvvnTjDBh8eynMulu8Vg==}
engines: {node: '>=14.16'}
dependencies:
'@tokenizer/token': 0.3.0
ieee754: 1.2.1
dev: false
/tr46@0.0.3: /tr46@0.0.3:
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
dev: false dev: false
@ -11369,6 +11663,13 @@ packages:
which-boxed-primitive: 1.0.2 which-boxed-primitive: 1.0.2
dev: true dev: true
/unbzip2-stream@1.4.3:
resolution: {integrity: sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==}
dependencies:
buffer: 5.7.1
through: 2.3.8
dev: false
/underscore@1.13.6: /underscore@1.13.6:
resolution: {integrity: sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A==} resolution: {integrity: sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A==}
dev: false dev: false
@ -11943,6 +12244,13 @@ packages:
engines: {node: '>= 14'} engines: {node: '>= 14'}
dev: true dev: true
/yauzl@2.10.0:
resolution: {integrity: sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==}
dependencies:
buffer-crc32: 0.2.13
fd-slicer: 1.1.0
dev: false
/yjs@13.6.14: /yjs@13.6.14:
resolution: {integrity: sha512-D+7KcUr0j+vBCUSKXXEWfA+bG4UQBviAwP3gYBhkstkgwy5+8diOPMx0iqLIOxNo/HxaREUimZRxqHGAHCL2BQ==} resolution: {integrity: sha512-D+7KcUr0j+vBCUSKXXEWfA+bG4UQBviAwP3gYBhkstkgwy5+8diOPMx0iqLIOxNo/HxaREUimZRxqHGAHCL2BQ==}
engines: {node: '>=16.0.0', npm: '>=8.0.0'} engines: {node: '>=16.0.0', npm: '>=8.0.0'}
@ -12029,3 +12337,11 @@ packages:
/zwitch@2.0.4: /zwitch@2.0.4:
resolution: {integrity: sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==} resolution: {integrity: sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==}
dev: false dev: false
'@cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz':
resolution: {tarball: https://cdn.sheetjs.com/xlsx-0.19.3/xlsx-0.19.3.tgz}
name: xlsx
version: 0.19.3
engines: {node: '>=0.8'}
hasBin: true
dev: false

View File

@ -82,7 +82,7 @@
"name": "Embedding-2", "name": "Embedding-2",
"avatar": "/imgs/model/openai.svg", "avatar": "/imgs/model/openai.svg",
"charsPointsPrice": 0, "charsPointsPrice": 0,
"defaultToken": 700, "defaultToken": 512,
"maxToken": 3000, "maxToken": 3000,
"weight": 100, "weight": 100,
"dbConfig": {}, "dbConfig": {},

View File

@ -56,6 +56,7 @@
} }
}, },
"common": { "common": {
"Action": "Action",
"Add": "Add", "Add": "Add",
"Add New": "Add", "Add New": "Add",
"All": "All", "All": "All",
@ -79,6 +80,7 @@
"Create New": "Create", "Create New": "Create",
"Create Success": "Create Success", "Create Success": "Create Success",
"Create Time": "Create time", "Create Time": "Create time",
"Creating": "Creating",
"Custom Title": "Custom Title", "Custom Title": "Custom Title",
"Delete": "Delete", "Delete": "Delete",
"Delete Failed": "Delete Failed", "Delete Failed": "Delete Failed",
@ -191,6 +193,7 @@
"Empty file tip": "The file content is empty. The file may be unreadable or pure image file content.", "Empty file tip": "The file content is empty. The file may be unreadable or pure image file content.",
"File Content": "File Content", "File Content": "File Content",
"File Name": "File Name", "File Name": "File Name",
"File Size": "File Size",
"File content can not be empty": "File content can not be empty", "File content can not be empty": "File content can not be empty",
"Filename Can not Be Empty": "Filename Can not Be Empty", "Filename Can not Be Empty": "Filename Can not Be Empty",
"Read File Error": "Read file error", "Read File Error": "Read file error",
@ -198,6 +201,7 @@
"Select failed": "Select file failed", "Select failed": "Select file failed",
"Select file amount limit": "A maximum of {{max}} files can be selected", "Select file amount limit": "A maximum of {{max}} files can be selected",
"Select file amount limit 100": "You can select a maximum of 100 files at a time", "Select file amount limit 100": "You can select a maximum of 100 files at a time",
"Some file count exceeds limit": "The number of files exceeding {{maxCount}} has been automatically intercepted",
"Some file size exceeds limit": "Some files exceed: {{maxSize}}, have been filtered", "Some file size exceeds limit": "Some files exceed: {{maxSize}}, have been filtered",
"Support file type": "Support {{fileType}} files", "Support file type": "Support {{fileType}} files",
"Support max count": "A maximum of {{maxCount}} files are supported.", "Support max count": "A maximum of {{maxCount}} files are supported.",
@ -620,7 +624,7 @@
"file": "File", "file": "File",
"folder": "Folder", "folder": "Folder",
"import": { "import": {
"Auto mode Estimated Price Tips": "Enhanced processing calls the file processing model: {{price}} integral /1k Tokens", "Auto mode Estimated Price Tips": "Need to call the file processing model, need to consume more Tokens: {{price}} credits /1k Tokens",
"Auto process": "Auto", "Auto process": "Auto",
"Auto process desc": "Automatically set segmentation and preprocessing rules", "Auto process desc": "Automatically set segmentation and preprocessing rules",
"CSV Import": "CSV QA Import", "CSV Import": "CSV QA Import",
@ -642,7 +646,7 @@
"Data file progress": "Data upload progress", "Data file progress": "Data upload progress",
"Data process params": "Data process params", "Data process params": "Data process params",
"Down load csv template": "Down load csv template", "Down load csv template": "Down load csv template",
"Embedding Estimated Price Tips": "Index billing: {{price}}/1k Tokens", "Embedding Estimated Price Tips": "Use only the index model and consume a small amount of Tokens: {{price}} credits /1k Tokens",
"Estimated Price": "Estimated Price: : {{amount}}{{unit}}", "Estimated Price": "Estimated Price: : {{amount}}{{unit}}",
"Estimated Price Tips": "QA charges: {{charsPointsPrice}} points/1k Tokens", "Estimated Price Tips": "QA charges: {{charsPointsPrice}} points/1k Tokens",
"Estimated points": "About {{points}} points", "Estimated points": "About {{points}} points",
@ -657,15 +661,19 @@
"Import Failed": "Import Failed", "Import Failed": "Import Failed",
"Import Success Tip": "The {{num}} group data is imported successfully. Please wait for training.", "Import Success Tip": "The {{num}} group data is imported successfully. Please wait for training.",
"Import Tip": "This task cannot be terminated and takes some time to generate indexes. Please confirm the import. If the balance is insufficient, the unfinished task will be suspended and can continue after topping up.", "Import Tip": "This task cannot be terminated and takes some time to generate indexes. Please confirm the import. If the balance is insufficient, the unfinished task will be suspended and can continue after topping up.",
"Import success": "Import successful, please wait for training",
"Link name": "Link name", "Link name": "Link name",
"Link name placeholder": "Only static links are supported\nOne per line, up to 10 links at a time", "Link name placeholder": "Only static links are supported\nOne per line, up to 10 links at a time",
"Local file": "Local file", "Local file": "Local file",
"Local file desc": "Upload files in PDF, TXT, DOCX and other formats", "Local file desc": "Upload files in PDF, TXT, DOCX and other formats",
"Only Show First 50 Chunk": "Show only part", "Only Show First 50 Chunk": "Show only part",
"Preview chunks": "Chunks", "Predicted chunk": "Predicted chunk",
"Preview raw text": "Preview file text (max show 10000 words)", "Predicted chunk amount": "Predicted chunks:{{amount}}",
"Predicted total chars": "Predicted chars: {{total}}",
"Preview chunks": "Preview chunks",
"Preview raw text": "Preview file text (max show 3000 words)",
"Process way": "Process way", "Process way": "Process way",
"QA Estimated Price Tips": "QA billing: {{price}}/1k Tokens (including input and output)", "QA Estimated Price Tips": "Need to call the file processing model, need to consume more Tokens: {{price}} credits /1k Tokens",
"QA Import": "QA Split", "QA Import": "QA Split",
"QA Import Tip": "According to certain rules, the text is broken into a larger paragraph, and the AI is invoked to generate a question and answer pair for the paragraph.", "QA Import Tip": "According to certain rules, the text is broken into a larger paragraph, and the AI is invoked to generate a question and answer pair for the paragraph.",
"Re Preview": "RePreview", "Re Preview": "RePreview",
@ -680,8 +688,8 @@
"Total tokens": "Tokens", "Total tokens": "Tokens",
"Training mode": "Training mode", "Training mode": "Training mode",
"Upload data": "Upload data", "Upload data": "Upload data",
"Upload file progress": "File upload progress", "Upload file progress": "Upload state",
"Upload status": "Upload status", "Upload status": "Status",
"Upload success": "Upload success", "Upload success": "Upload success",
"Web link": "Web link", "Web link": "Web link",
"Web link desc": "Fetch static web content as a collection" "Web link desc": "Fetch static web content as a collection"
@ -1348,6 +1356,7 @@
"Pay error": "Pay error", "Pay error": "Pay error",
"Pay success": "Pay success", "Pay success": "Pay success",
"Plan expired time": "Plan expired time", "Plan expired time": "Plan expired time",
"Plan reset time": "Plan reset time",
"Standard Plan Detail": "Standard Plan Detail", "Standard Plan Detail": "Standard Plan Detail",
"To read plan": "Read plan", "To read plan": "Read plan",
"bill": { "bill": {

View File

@ -56,6 +56,7 @@
} }
}, },
"common": { "common": {
"Action": "操作",
"Add": "添加", "Add": "添加",
"Add New": "新增", "Add New": "新增",
"All": "全部", "All": "全部",
@ -79,6 +80,7 @@
"Create New": "新建", "Create New": "新建",
"Create Success": "创建成功", "Create Success": "创建成功",
"Create Time": "创建时间", "Create Time": "创建时间",
"Creating": "创建中",
"Custom Title": "自定义标题", "Custom Title": "自定义标题",
"Delete": "删除", "Delete": "删除",
"Delete Failed": "删除失败", "Delete Failed": "删除失败",
@ -191,6 +193,7 @@
"Empty file tip": "文件内容为空,可能该文件无法读取或为纯图片文件内容。", "Empty file tip": "文件内容为空,可能该文件无法读取或为纯图片文件内容。",
"File Content": "文件内容", "File Content": "文件内容",
"File Name": "文件名", "File Name": "文件名",
"File Size": "文件大小",
"File content can not be empty": "文件内容不能为空", "File content can not be empty": "文件内容不能为空",
"Filename Can not Be Empty": "文件名不能为空", "Filename Can not Be Empty": "文件名不能为空",
"Read File Error": "解析文件失败", "Read File Error": "解析文件失败",
@ -198,6 +201,7 @@
"Select failed": "选择文件异常", "Select failed": "选择文件异常",
"Select file amount limit": "最多选择 {{max}} 个文件", "Select file amount limit": "最多选择 {{max}} 个文件",
"Select file amount limit 100": "每次最多选择100个文件", "Select file amount limit 100": "每次最多选择100个文件",
"Some file count exceeds limit": "超出{{maxCount}}个文件,已自动截取",
"Some file size exceeds limit": "部分文件超出: {{maxSize}},已被过滤", "Some file size exceeds limit": "部分文件超出: {{maxSize}},已被过滤",
"Support file type": "支持 {{fileType}} 类型文件", "Support file type": "支持 {{fileType}} 类型文件",
"Support max count": "最多支持 {{maxCount}} 个文件。", "Support max count": "最多支持 {{maxCount}} 个文件。",
@ -622,7 +626,7 @@
"file": "文件", "file": "文件",
"folder": "目录", "folder": "目录",
"import": { "import": {
"Auto mode Estimated Price Tips": "增强处理需调用文件处理模型: {{price}}积分/1k Tokens", "Auto mode Estimated Price Tips": "需调用文件处理模型需要消耗较多Tokens: {{price}}积分/1k Tokens",
"Auto process": "自动", "Auto process": "自动",
"Auto process desc": "自动设置分割和预处理规则", "Auto process desc": "自动设置分割和预处理规则",
"CSV Import": "CSV 导入", "CSV Import": "CSV 导入",
@ -644,7 +648,7 @@
"Data file progress": "数据上传进度", "Data file progress": "数据上传进度",
"Data process params": "数据处理参数", "Data process params": "数据处理参数",
"Down load csv template": "点击下载 CSV 模板", "Down load csv template": "点击下载 CSV 模板",
"Embedding Estimated Price Tips": "索引计费: {{price}}积分/1k Tokens", "Embedding Estimated Price Tips": "仅使用索引模型消耗少量Tokens: {{price}}积分/1k Tokens",
"Estimated Price": "预估价格: {{amount}}{{unit}}", "Estimated Price": "预估价格: {{amount}}{{unit}}",
"Estimated Price Tips": "QA计费为\n输入: {{charsPointsPrice}}积分/1k Tokens", "Estimated Price Tips": "QA计费为\n输入: {{charsPointsPrice}}积分/1k Tokens",
"Estimated points": "预估消耗 {{points}} 积分", "Estimated points": "预估消耗 {{points}} 积分",
@ -659,15 +663,19 @@
"Import Failed": "导入文件失败", "Import Failed": "导入文件失败",
"Import Success Tip": "共成功导入 {{num}} 组数据,请耐心等待训练.", "Import Success Tip": "共成功导入 {{num}} 组数据,请耐心等待训练.",
"Import Tip": "该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。", "Import Tip": "该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。",
"Import success": "导入成功,请等待训练",
"Link name": "网络链接", "Link name": "网络链接",
"Link name placeholder": "仅支持静态链接,如果上传后数据为空,可能该链接无法被读取\n每行一个每次最多 10 个链接", "Link name placeholder": "仅支持静态链接,如果上传后数据为空,可能该链接无法被读取\n每行一个每次最多 10 个链接",
"Local file": "本地文件", "Local file": "本地文件",
"Local file desc": "上传 PDF, TXT, DOCX 等格式的文件", "Local file desc": "上传 PDF, TXT, DOCX 等格式的文件",
"Only Show First 50 Chunk": "仅展示部分", "Only Show First 50 Chunk": "仅展示部分",
"Preview chunks": "分段预览", "Predicted chunk": "预估分段",
"Preview raw text": "预览源文本最多展示10000字", "Predicted chunk amount": "预估分段:{{amount}}",
"Predicted total chars": "预估字数: {{total}}",
"Preview chunks": "预览分段最多5段",
"Preview raw text": "预览源文本最多3000字",
"Process way": "处理方式", "Process way": "处理方式",
"QA Estimated Price Tips": "QA计费为: {{price}}积分/1k Tokens(包含输入和输出)", "QA Estimated Price Tips": "需调用文件处理模型需要消耗较多Tokens: {{price}}积分/1k Tokens",
"QA Import": "QA拆分", "QA Import": "QA拆分",
"QA Import Tip": "根据一定规则,将文本拆成一段较大的段落,调用 AI 为该段落生成问答对。有非常高的检索精度,但是会丢失很多内容细节。", "QA Import Tip": "根据一定规则,将文本拆成一段较大的段落,调用 AI 为该段落生成问答对。有非常高的检索精度,但是会丢失很多内容细节。",
"Re Preview": "重新生成预览", "Re Preview": "重新生成预览",
@ -683,7 +691,7 @@
"Training mode": "训练模式", "Training mode": "训练模式",
"Upload data": "上传数据", "Upload data": "上传数据",
"Upload file progress": "文件上传进度", "Upload file progress": "文件上传进度",
"Upload status": "上传状态", "Upload status": "状态",
"Upload success": "上传成功", "Upload success": "上传成功",
"Web link": "网页链接", "Web link": "网页链接",
"Web link desc": "读取静态网页内容作为数据集" "Web link desc": "读取静态网页内容作为数据集"
@ -1350,6 +1358,7 @@
"Pay error": "支付失败", "Pay error": "支付失败",
"Pay success": "支付成功", "Pay success": "支付成功",
"Plan expired time": "套餐到期时间", "Plan expired time": "套餐到期时间",
"Plan reset time": "套餐重置时间",
"Standard Plan Detail": "套餐详情", "Standard Plan Detail": "套餐详情",
"To read plan": "查看套餐", "To read plan": "查看套餐",
"bill": { "bill": {
@ -1407,7 +1416,7 @@
"Standard update fail": "修改订阅套餐异常", "Standard update fail": "修改订阅套餐异常",
"Standard update success": "变更订阅套餐成功!", "Standard update success": "变更订阅套餐成功!",
"Sub plan": "订阅套餐", "Sub plan": "订阅套餐",
"Sub plan tip": "免费使用 FastGPT 或升级更高的套餐", "Sub plan tip": "免费使用 {{title}} 或升级更高的套餐",
"Team plan and usage": "套餐与用量", "Team plan and usage": "套餐与用量",
"Training weight": "训练优先级: {{weight}}", "Training weight": "训练优先级: {{weight}}",
"Update extra ai points": "额外AI积分", "Update extra ai points": "额外AI积分",

View File

@ -2,6 +2,7 @@ import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { import {
DatasetSearchModeEnum, DatasetSearchModeEnum,
DatasetTypeEnum, DatasetTypeEnum,
ImportDataSourceEnum,
TrainingModeEnum TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants'; } from '@fastgpt/global/core/dataset/constants';
import { import {
@ -67,3 +68,24 @@ export type SearchTestResponse = {
similarity: number; similarity: number;
usingQueryExtension: boolean; usingQueryExtension: boolean;
}; };
/* =========== training =========== */
export type PostPreviewFilesChunksProps = {
type: `${ImportDataSourceEnum}`;
sourceId: string;
chunkSize: number;
overlapRatio: number;
customSplitChar?: string;
};
export type PostPreviewFilesChunksResponse = {
fileId: string;
rawTextLength: number;
chunks: string[];
}[];
export type PostPreviewTableChunksResponse = {
fileId: string;
totalChunks: number;
chunks: { q: string; a: string; chunkIndex: number }[];
errorText?: string;
}[];

View File

@ -1,5 +0,0 @@
/* ================= dataset ===================== */
/* ================= collection ===================== */
/* ================= data ===================== */

View File

@ -397,14 +397,22 @@ const PlanUsage = () => {
<Box fontWeight={'bold'} fontSize="xl"> <Box fontWeight={'bold'} fontSize="xl">
{t(planName)} {t(planName)}
</Box> </Box>
<Flex mt="2" color={'#485264'} fontSize="sm">
<Box>{t('support.wallet.Plan expired time')}:</Box> {isFreeTeam ? (
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box> <>
</Flex> <Flex mt="2" color={'#485264'} fontSize="sm">
{isFreeTeam && ( <Box>{t('support.wallet.Plan reset time')}:</Box>
<Box mt="2" color={'#485264'} fontSize="sm"> <Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
30使 </Flex>
</Box> <Box mt="2" color={'#485264'} fontSize="sm">
30使
</Box>
</>
) : (
<Flex mt="2" color={'#485264'} fontSize="sm">
<Box>{t('support.wallet.Plan expired time')}:</Box>
<Box ml={2}>{formatTime2YMD(standardPlan?.expiredTime)}</Box>
</Flex>
)} )}
</Box> </Box>
<Button onClick={() => router.push('/price')}> <Button onClick={() => router.push('/price')}>

View File

@ -2,51 +2,15 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response'; import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo'; import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common'; import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { addLog } from '@fastgpt/service/common/system/log'; import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
import { addHours } from 'date-fns'; import { addHours } from 'date-fns';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
import { MongoImage } from '@fastgpt/service/common/file/image/schema'; import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
/*
1.
*/
let deleteImageAmount = 0; let deleteImageAmount = 0;
async function checkInvalidImg(start: Date, end: Date, limit = 50) {
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
const {
startHour = 72,
endHour = 24,
limit = 10
} = req.body as { startHour?: number; endHour?: number; limit?: number };
await authCert({ req, authRoot: true });
await connectToDatabase();
// start: now - maxDay, end: now - 3 day
const start = addHours(new Date(), -startHour);
const end = addHours(new Date(), -endHour);
deleteImageAmount = 0;
await checkInvalid(start, end, limit);
jsonRes(res, {
data: deleteImageAmount
});
} catch (error) {
addLog.error(`check Invalid user error`, error);
jsonRes(res, {
code: 500,
error
});
}
}
export async function checkInvalid(start: Date, end: Date, limit = 50) {
const images = await MongoImage.find( const images = await MongoImage.find(
{ {
createTime: { createTime: {
@ -86,3 +50,37 @@ export async function checkInvalid(start: Date, end: Date, limit = 50) {
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`); console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
} }
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
await connectToDatabase();
await authCert({ req, authRoot: true });
(async () => {
try {
console.log('执行脏数据清理任务');
const end = addHours(new Date(), -1);
const start = addHours(new Date(), -360 * 24);
await checkFiles(start, end);
await checkInvalidImg(start, end);
await checkInvalidCollection(start, end);
await checkInvalidVector(start, end);
console.log('执行脏数据清理任务完毕');
} catch (error) {
console.log('执行脏数据清理任务出错了');
}
})();
jsonRes(res, {
message: 'success'
});
} catch (error) {
console.log(error);
jsonRes(res, {
code: 500,
error
});
}
}

View File

@ -6,9 +6,52 @@ import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
import { connectionMongo } from '@fastgpt/service/common/mongo'; import { connectionMongo } from '@fastgpt/service/common/mongo';
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles'; import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
import { addHours } from 'date-fns'; import { addHours } from 'date-fns';
import { checkInvalid as checkInvalidImg } from '../timerTask/dataset/checkInvalidDatasetImage';
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection'; import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector'; import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
let deleteImageAmount = 0;
export async function checkInvalidImg(start: Date, end: Date, limit = 50) {
const images = await MongoImage.find(
{
createTime: {
$gte: start,
$lte: end
},
'metadata.relatedId': { $exists: true }
},
'_id teamId metadata'
);
console.log('total images', images.length);
let index = 0;
for await (const image of images) {
try {
// 1. 检测是否有对应的集合
const collection = await MongoDatasetCollection.findOne(
{
teamId: image.teamId,
'metadata.relatedImgId': image.metadata?.relatedId
},
'_id'
);
if (!collection) {
await image.deleteOne();
deleteImageAmount++;
}
index++;
index % 100 === 0 && console.log(index);
} catch (error) {
console.log(error);
}
}
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
}
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */ /* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) { export default async function handler(req: NextApiRequest, res: NextApiResponse) {

View File

@ -2,13 +2,6 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response'; import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo'; import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common'; import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
import { addHours } from 'date-fns';
import { checkInvalid as checkInvalidImg } from '../timerTask/dataset/checkInvalidDatasetImage';
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
import { MongoPlugin } from '@fastgpt/service/core/plugin/schema'; import { MongoPlugin } from '@fastgpt/service/core/plugin/schema';
import { PluginTypeEnum } from '@fastgpt/global/core/plugin/constants'; import { PluginTypeEnum } from '@fastgpt/global/core/plugin/constants';

View File

@ -0,0 +1,41 @@
/*
Read db file content and response 3000 words
*/
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { fileId, csvFormat } = req.body as { fileId: string; csvFormat?: boolean };
if (!fileId) {
throw new Error('fileId is empty');
}
const { teamId } = await authFile({ req, authToken: true, fileId });
const { rawText } = await readFileContent({
teamId,
bucketName: BucketNameEnum.dataset,
fileId,
csvFormat
});
jsonRes(res, {
data: {
previewContent: rawText.slice(0, 3000),
totalLength: rawText.length
}
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@ -2,9 +2,12 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response'; import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo'; import { connectToDatabase } from '@/service/mongo';
import { authFileToken } from '@fastgpt/service/support/permission/controller'; import { authFileToken } from '@fastgpt/service/support/permission/controller';
import { getDownloadStream, getFileById } from '@fastgpt/service/common/file/gridfs/controller'; import {
getDownloadStream,
getFileById,
readFileEncode
} from '@fastgpt/service/common/file/gridfs/controller';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common'; import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try { try {
@ -18,8 +21,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
throw new Error('fileId is empty'); throw new Error('fileId is empty');
} }
const [file, encodeStream] = await Promise.all([ const [file, encoding, fileStream] = await Promise.all([
getFileById({ bucketName, fileId }), getFileById({ bucketName, fileId }),
readFileEncode({ bucketName, fileId }),
getDownloadStream({ bucketName, fileId }) getDownloadStream({ bucketName, fileId })
]); ]);
@ -27,24 +31,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
return Promise.reject(CommonErrEnum.fileNotFound); return Promise.reject(CommonErrEnum.fileNotFound);
} }
// get encoding
let buffers: Buffer = Buffer.from([]);
for await (const chunk of encodeStream) {
buffers = Buffer.concat([buffers, chunk]);
if (buffers.length > 10) {
encodeStream.abort();
break;
}
}
const encoding = detectFileEncoding(buffers);
res.setHeader('Content-Type', `${file.contentType}; charset=${encoding}`); res.setHeader('Content-Type', `${file.contentType}; charset=${encoding}`);
res.setHeader('Cache-Control', 'public, max-age=3600'); res.setHeader('Cache-Control', 'public, max-age=3600');
res.setHeader('Content-Disposition', `inline; filename="${encodeURIComponent(file.filename)}"`); res.setHeader('Content-Disposition', `inline; filename="${encodeURIComponent(file.filename)}"`);
const fileStream = await getDownloadStream({ bucketName, fileId });
fileStream.pipe(res); fileStream.pipe(res);
fileStream.on('error', () => { fileStream.on('error', () => {

View File

@ -4,24 +4,22 @@ import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common'; import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller'; import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
import { getUploadModel } from '@fastgpt/service/common/file/multer'; import { getUploadModel } from '@fastgpt/service/common/file/multer';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
/**
* Creates the multer uploader
*/
const upload = getUploadModel({
maxSize: 500 * 1024 * 1024
});
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let filePaths: string[] = []; /* Creates the multer uploader */
const upload = getUploadModel({
maxSize: (global.feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024
});
const filePaths: string[] = [];
try { try {
const { teamId, tmbId } = await authCert({ req, authToken: true }); await connectToDatabase();
const { file, bucketName, metadata } = await upload.doUpload(req, res); const { file, bucketName, metadata } = await upload.doUpload(req, res);
filePaths = [file.path]; filePaths.push(file.path);
await connectToDatabase();
const { teamId, tmbId } = await authCert({ req, authToken: true });
if (!bucketName) { if (!bucketName) {
throw new Error('bucketName is empty'); throw new Error('bucketName is empty');
@ -46,6 +44,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
error error
}); });
} }
removeFilesByPaths(filePaths);
} }
export const config = { export const config = {

View File

@ -12,12 +12,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const { teamId } = await authChatCert({ req, authToken: true }); const { teamId } = await authChatCert({ req, authToken: true });
const data = await uploadMongoImg({ const imgId = await uploadMongoImg({
teamId, teamId,
...body ...body
}); });
jsonRes(res, { data }); jsonRes(res, { data: imgId });
} catch (error) { } catch (error) {
jsonRes(res, { jsonRes(res, {
code: 500, code: 500,

View File

@ -0,0 +1,112 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { datasetId, parentId, fileId } = req.body as FileIdCreateDatasetCollectionParams;
const trainingType = TrainingModeEnum.chunk;
try {
await connectToDatabase();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId: datasetId
});
// 1. read file
const { rawText, filename } = await readFileContent({
teamId,
bucketName: BucketNameEnum.dataset,
fileId
});
// 2. split chunks
const { chunks = [] } = parseCsvTable2Chunks(rawText);
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
teamId,
tmbId,
name: filename,
parentId,
datasetId,
type: DatasetCollectionTypeEnum.file,
fileId,
// special metadata
trainingType,
chunkSize: 0,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
billId,
data: chunks.map((chunk, index) => ({
q: chunk.q,
a: chunk.a,
chunkIndex: index
})),
session
});
return collectionId;
});
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@ -1,94 +1,151 @@
import type { NextApiRequest, NextApiResponse } from 'next'; import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response'; import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo'; import { connectToDatabase } from '@/service/mongo';
import { delFileByFileIdList, uploadFile } from '@fastgpt/service/common/file/gridfs/controller'; import {
import { getUploadModel } from '@fastgpt/service/common/file/multer'; delFileByFileIdList,
readFileContent
} from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api'; import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller'; import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants'; import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants'; import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
/** import { MongoImage } from '@fastgpt/service/common/file/image/schema';
* Creates the multer uploader import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
*/ import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
const upload = getUploadModel({ import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
maxSize: 500 * 1024 * 1024 import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
}); import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let filePaths: string[] = []; const {
let fileId: string = ''; fileId,
const { datasetId } = req.query as { datasetId: string }; trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as FileIdCreateDatasetCollectionParams;
try { try {
await connectToDatabase(); await connectToDatabase();
const { teamId, tmbId } = await authDataset({ const { teamId, tmbId, dataset } = await authDataset({
req, req,
authToken: true, authToken: true,
authApiKey: true, authApiKey: true,
per: 'w', per: 'w',
datasetId datasetId: body.datasetId
}); });
const { file, bucketName, data } = await upload.doUpload<FileCreateDatasetCollectionParams>( // 1. read file
req, const { rawText, filename } = await readFileContent({
res
);
filePaths = [file.path];
if (!file || !bucketName) {
throw new Error('file is empty');
}
const { fileMetadata, collectionMetadata, ...collectionData } = data;
// upload file and create collection
fileId = await uploadFile({
teamId, teamId,
tmbId, bucketName: BucketNameEnum.dataset,
bucketName,
path: file.path,
filename: file.originalname,
contentType: file.mimetype,
metadata: fileMetadata
});
// create collection
const { _id: collectionId } = await createOneCollection({
...collectionData,
metadata: collectionMetadata,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
fileId fileId
}); });
// 2. split chunks
jsonRes(res, { const { chunks } = splitText2Chunks({
data: collectionId text: rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
}); });
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
name: filename,
fileId,
metadata: {
relatedImgId: fileId
},
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return collectionId;
});
startTrainingQueue(true);
jsonRes(res);
} catch (error) { } catch (error) {
if (fileId) {
try {
await delFileByFileIdList({
fileIdList: [fileId],
bucketName: BucketNameEnum.dataset
});
} catch (error) {}
}
jsonRes(res, { jsonRes(res, {
code: 500, code: 500,
error error
}); });
} }
removeFilesByPaths(filePaths);
} }
export const config = {
api: {
bodyParser: false
}
};

View File

@ -19,6 +19,7 @@ import { hashStr } from '@fastgpt/global/common/string/tools';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller'; import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants'; import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model'; import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try { try {
@ -55,9 +56,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
insertLen: predictDataLimitLength(trainingType, chunks) insertLen: predictDataLimitLength(trainingType, chunks)
}); });
// 3. create collection and training bill const createResult = await mongoSessionRun(async (session) => {
const [{ _id: collectionId }, { billId }] = await Promise.all([ // 3. create collection
createOneCollection({ const { _id: collectionId } = await createOneCollection({
...body, ...body,
teamId, teamId,
tmbId, tmbId,
@ -70,34 +71,44 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
qaPrompt, qaPrompt,
hashRawText: hashStr(text), hashRawText: hashStr(text),
rawTextLength: text.length rawTextLength: text.length,
}), session
createTrainingUsage({ });
// 4. create training bill
const { billId } = await createTrainingUsage({
teamId, teamId,
tmbId, tmbId,
appName: name, appName: name,
billSource: UsageSourceEnum.training, billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name, vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name agentModel: getLLMModel(dataset.agentModel)?.name,
}) session
]); });
// 4. push chunks to training queue // 5. push chunks to training queue
const insertResults = await pushDataListToTrainingQueue({ const insertResults = await pushDataListToTrainingQueue({
teamId, teamId,
tmbId, tmbId,
collectionId, datasetId: dataset._id,
trainingMode: trainingType, collectionId,
prompt: qaPrompt, agentModel: dataset.agentModel,
billId, vectorModel: dataset.vectorModel,
data: chunks.map((text, index) => ({ trainingMode: trainingType,
q: text, prompt: qaPrompt,
chunkIndex: index billId,
})) data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
})),
session
});
return { collectionId, results: insertResults };
}); });
jsonRes(res, { jsonRes(res, {
data: { collectionId, results: insertResults } data: createResult
}); });
} catch (err) { } catch (err) {
jsonRes(res, { jsonRes(res, {

View File

@ -15,7 +15,8 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try { try {
await connectToDatabase(); await connectToDatabase();
const { collectionId, data } = req.body as PushDatasetDataProps; const body = req.body as PushDatasetDataProps;
const { collectionId, data } = body;
if (!collectionId || !Array.isArray(data)) { if (!collectionId || !Array.isArray(data)) {
throw new Error('collectionId or data is empty'); throw new Error('collectionId or data is empty');
@ -42,9 +43,12 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
jsonRes<PushDatasetDataResponse>(res, { jsonRes<PushDatasetDataResponse>(res, {
data: await pushDataListToTrainingQueue({ data: await pushDataListToTrainingQueue({
...req.body, ...body,
teamId, teamId,
tmbId tmbId,
datasetId: collection.datasetId._id,
agentModel: collection.datasetId.agentModel,
vectorModel: collection.datasetId.vectorModel
}) })
}); });
} catch (err) { } catch (err) {

View File

@ -0,0 +1,80 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { PostPreviewFilesChunksProps } from '@/global/core/dataset/api';
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { type, sourceId, chunkSize, customSplitChar, overlapRatio } =
req.body as PostPreviewFilesChunksProps;
if (!sourceId) {
throw new Error('fileIdList is empty');
}
if (chunkSize > 30000) {
throw new Error('chunkSize is too large, should be less than 30000');
}
const { chunks } = await (async () => {
if (type === ImportDataSourceEnum.fileLocal) {
const { file, teamId } = await authFile({ req, authToken: true, fileId: sourceId });
const fileId = String(file._id);
const { rawText } = await readFileContent({
teamId,
bucketName: BucketNameEnum.dataset,
fileId,
csvFormat: true
});
// split chunks (5 chunk)
const sliceRawText = 10 * chunkSize;
const { chunks } = splitText2Chunks({
text: rawText.slice(0, sliceRawText),
chunkLen: chunkSize,
overlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
});
return {
chunks: chunks.map((item) => ({
q: item,
a: ''
}))
};
}
if (type === ImportDataSourceEnum.csvTable) {
const { file, teamId } = await authFile({ req, authToken: true, fileId: sourceId });
const fileId = String(file._id);
const { rawText } = await readFileContent({
teamId,
bucketName: BucketNameEnum.dataset,
fileId,
csvFormat: false
});
const { chunks } = parseCsvTable2Chunks(rawText);
return {
chunks: chunks || []
};
}
return { chunks: [] };
})();
jsonRes<{ q: string; a: string }[]>(res, {
data: chunks.slice(0, 5)
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@ -32,7 +32,6 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
import MyInput from '@/components/MyInput'; import MyInput from '@/components/MyInput';
import dayjs from 'dayjs'; import dayjs from 'dayjs';
import { useRequest } from '@fastgpt/web/hooks/useRequest'; import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { useLoading } from '@fastgpt/web/hooks/useLoading';
import { useRouter } from 'next/router'; import { useRouter } from 'next/router';
import { useSystemStore } from '@/web/common/system/useSystemStore'; import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyMenu from '@/components/MyMenu'; import MyMenu from '@/components/MyMenu';
@ -62,11 +61,11 @@ import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type'; import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import { DatasetCollectionSyncResultEnum } from '@fastgpt/global/core/dataset/constants'; import { DatasetCollectionSyncResultEnum } from '@fastgpt/global/core/dataset/constants';
import MyBox from '@/components/common/MyBox'; import MyBox from '@/components/common/MyBox';
import { ImportDataSourceEnum } from './Import';
import { usePagination } from '@fastgpt/web/hooks/usePagination'; import { usePagination } from '@fastgpt/web/hooks/usePagination';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
const WebSiteConfigModal = dynamic(() => import('./Import/WebsiteConfig'), {}); const WebSiteConfigModal = dynamic(() => import('./Import/WebsiteConfig'), {});
const FileSourceSelector = dynamic(() => import('./Import/sourceSelector/FileSourceSelector'), {}); const FileSourceSelector = dynamic(() => import('./Import/components/FileSourceSelector'), {});
const CollectionCard = () => { const CollectionCard = () => {
const BoxRef = useRef<HTMLDivElement>(null); const BoxRef = useRef<HTMLDivElement>(null);
@ -76,14 +75,14 @@ const CollectionCard = () => {
const { toast } = useToast(); const { toast } = useToast();
const { parentId = '', datasetId } = router.query as { parentId: string; datasetId: string }; const { parentId = '', datasetId } = router.query as { parentId: string; datasetId: string };
const { t } = useTranslation(); const { t } = useTranslation();
const { Loading } = useLoading();
const { isPc } = useSystemStore(); const { isPc } = useSystemStore();
const { userInfo } = useUserStore(); const { userInfo } = useUserStore();
const [searchText, setSearchText] = useState(''); const [searchText, setSearchText] = useState('');
const { datasetDetail, updateDataset, startWebsiteSync, loadDatasetDetail } = useDatasetStore(); const { datasetDetail, updateDataset, startWebsiteSync, loadDatasetDetail } = useDatasetStore();
const { openConfirm: openDeleteConfirm, ConfirmModal: ConfirmDeleteModal } = useConfirm({ const { openConfirm: openDeleteConfirm, ConfirmModal: ConfirmDeleteModal } = useConfirm({
content: t('dataset.Confirm to delete the file') content: t('dataset.Confirm to delete the file'),
type: 'delete'
}); });
const { openConfirm: openSyncConfirm, ConfirmModal: ConfirmSyncModal } = useConfirm({ const { openConfirm: openSyncConfirm, ConfirmModal: ConfirmSyncModal } = useConfirm({
content: t('core.dataset.collection.Start Sync Tip') content: t('core.dataset.collection.Start Sync Tip')
@ -452,7 +451,7 @@ const CollectionCard = () => {
query: { query: {
...router.query, ...router.query,
currentTab: TabEnum.import, currentTab: TabEnum.import,
source: ImportDataSourceEnum.tableLocal source: ImportDataSourceEnum.csvTable
} }
}) })
} }

View File

@ -1,6 +1,5 @@
import React, { useContext, useCallback, createContext, useState, useMemo, useEffect } from 'react'; import React, { useContext, createContext, useState, useMemo, useEffect } from 'react';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants'; import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
import { DatasetItemType } from '@fastgpt/global/core/dataset/type'; import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
@ -8,6 +7,7 @@ import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import { UseFormReturn, useForm } from 'react-hook-form'; import { UseFormReturn, useForm } from 'react-hook-form';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants'; import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import { ImportSourceItemType } from '@/web/core/dataset/type'; import { ImportSourceItemType } from '@/web/core/dataset/type';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
type ChunkSizeFieldType = 'embeddingChunkSize'; type ChunkSizeFieldType = 'embeddingChunkSize';
export type FormType = { export type FormType = {
@ -29,14 +29,11 @@ type useImportStoreType = {
showPromptInput: boolean; showPromptInput: boolean;
sources: ImportSourceItemType[]; sources: ImportSourceItemType[];
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>; setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
showRePreview: boolean;
totalChunkChars: number;
totalChunks: number;
chunkSize: number; chunkSize: number;
predictPoints: number; chunkOverlapRatio: number;
priceTip: string; priceTip: string;
uploadRate: number; uploadRate: number;
splitSources2Chunks: () => void; importSource: `${ImportDataSourceEnum}`;
}; };
const StateContext = createContext<useImportStoreType>({ const StateContext = createContext<useImportStoreType>({
processParamsForm: {} as any, processParamsForm: {} as any,
@ -49,23 +46,22 @@ const StateContext = createContext<useImportStoreType>({
showChunkInput: false, showChunkInput: false,
showPromptInput: false, showPromptInput: false,
chunkSizeField: 'embeddingChunkSize', chunkSizeField: 'embeddingChunkSize',
showRePreview: false,
totalChunkChars: 0,
totalChunks: 0,
chunkSize: 0, chunkSize: 0,
predictPoints: 0, chunkOverlapRatio: 0,
priceTip: '', priceTip: '',
uploadRate: 50, uploadRate: 50,
splitSources2Chunks: () => {} importSource: ImportDataSourceEnum.fileLocal
}); });
export const useImportStore = () => useContext(StateContext); export const useImportStore = () => useContext(StateContext);
const Provider = ({ const Provider = ({
importSource,
dataset, dataset,
parentId, parentId,
children children
}: { }: {
importSource: `${ImportDataSourceEnum}`;
dataset: DatasetItemType; dataset: DatasetItemType;
parentId?: string; parentId?: string;
children: React.ReactNode; children: React.ReactNode;
@ -86,7 +82,6 @@ const Provider = ({
const { t } = useTranslation(); const { t } = useTranslation();
const [sources, setSources] = useState<ImportSourceItemType[]>([]); const [sources, setSources] = useState<ImportSourceItemType[]>([]);
const [showRePreview, setShowRePreview] = useState(false);
// watch form // watch form
const mode = processParamsForm.watch('mode'); const mode = processParamsForm.watch('mode');
@ -154,68 +149,15 @@ const Provider = ({
const chunkSize = wayStaticPrams[way].chunkSize; const chunkSize = wayStaticPrams[way].chunkSize;
useEffect(() => { const value: useImportStoreType = {
setShowRePreview(true);
}, [mode, way, chunkSize, customSplitChar]);
const totalChunkChars = useMemo(
() => sources.reduce((sum, file) => sum + file.chunkChars, 0),
[sources]
);
const predictPoints = useMemo(() => {
const totalTokensPredict = totalChunkChars / 1000;
if (mode === TrainingModeEnum.auto) {
const price = totalTokensPredict * 1.3 * agentModel.charsPointsPrice;
return +price.toFixed(2);
}
if (mode === TrainingModeEnum.qa) {
const price = totalTokensPredict * 1.2 * agentModel.charsPointsPrice;
return +price.toFixed(2);
}
return +(totalTokensPredict * vectorModel.charsPointsPrice).toFixed(2);
}, [agentModel.charsPointsPrice, mode, totalChunkChars, vectorModel.charsPointsPrice]);
const totalChunks = useMemo(
() => sources.reduce((sum, file) => sum + file.chunks.length, 0),
[sources]
);
const splitSources2Chunks = useCallback(() => {
setSources((state) =>
state.map((file) => {
const { chunks, chars } = splitText2Chunks({
text: file.rawText,
chunkLen: chunkSize,
overlapRatio: selectModelStaticParam.chunkOverlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
});
return {
...file,
chunkChars: chars,
chunks: chunks.map((chunk, i) => ({
chunkIndex: i,
q: chunk,
a: ''
}))
};
})
);
setShowRePreview(false);
}, [chunkSize, customSplitChar, selectModelStaticParam.chunkOverlapRatio]);
const value = {
parentId, parentId,
processParamsForm, processParamsForm,
...selectModelStaticParam, ...selectModelStaticParam,
sources, sources,
setSources, setSources,
showRePreview,
totalChunkChars,
totalChunks,
chunkSize, chunkSize,
predictPoints,
splitSources2Chunks importSource
}; };
return <StateContext.Provider value={value}>{children}</StateContext.Provider>; return <StateContext.Provider value={value}>{children}</StateContext.Provider>;
}; };

View File

@ -1,4 +1,4 @@
import React, { useEffect, useMemo, useRef, useState } from 'react'; import React, { useMemo, useRef, useState } from 'react';
import { import {
Box, Box,
Flex, Flex,
@ -21,11 +21,11 @@ import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants'; import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import MyTooltip from '@/components/MyTooltip'; import MyTooltip from '@/components/MyTooltip';
import { useImportStore } from '../Provider'; import { useImportStore } from '../Provider';
import Tag from '@/components/Tag';
import { useSystemStore } from '@/web/common/system/useSystemStore'; import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyModal from '@fastgpt/web/components/common/MyModal'; import MyModal from '@fastgpt/web/components/common/MyModal';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent'; import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import Preview from '../components/Preview'; import Preview from '../components/Preview';
import Tag from '@/components/Tag';
function DataProcess({ function DataProcess({
showPreviewChunks = true, showPreviewChunks = true,
@ -38,17 +38,11 @@ function DataProcess({
const { feConfigs } = useSystemStore(); const { feConfigs } = useSystemStore();
const { const {
processParamsForm, processParamsForm,
sources,
chunkSizeField, chunkSizeField,
minChunkSize, minChunkSize,
showChunkInput, showChunkInput,
showPromptInput, showPromptInput,
maxChunkSize, maxChunkSize,
totalChunkChars,
totalChunks,
predictPoints,
showRePreview,
splitSources2Chunks,
priceTip priceTip
} = useImportStore(); } = useImportStore();
const { getValues, setValue, register } = processParamsForm; const { getValues, setValue, register } = processParamsForm;
@ -69,16 +63,10 @@ function DataProcess({
}); });
}, [feConfigs?.isPlus]); }, [feConfigs?.isPlus]);
useEffect(() => {
if (showPreviewChunks) {
splitSources2Chunks();
}
}, []);
return ( return (
<Box h={'100%'} display={['block', 'flex']} gap={5}> <Box h={'100%'} display={['block', 'flex']} gap={5}>
<Box flex={'1 0 0'} maxW={'600px'}> <Box flex={'1 0 0'} minW={['auto', '540px']} maxW={'600px'}>
<Flex fontWeight={'bold'} alignItems={'center'}> <Flex alignItems={'center'}>
<MyIcon name={'common/settingLight'} w={'20px'} /> <MyIcon name={'common/settingLight'} w={'20px'} />
<Box fontSize={'lg'}>{t('core.dataset.import.Data process params')}</Box> <Box fontSize={'lg'}>{t('core.dataset.import.Data process params')}</Box>
</Flex> </Flex>
@ -273,34 +261,18 @@ function DataProcess({
}} }}
></LeftRadio> ></LeftRadio>
</Flex> </Flex>
{showPreviewChunks && ( <Flex mt={5} alignItems={'center'} pl={'100px'} gap={3}>
<Flex mt={5} alignItems={'center'} pl={'100px'} gap={3}> {feConfigs?.show_pay && (
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}> <MyTooltip label={priceTip}>
{t('core.dataset.Total chunks', { total: totalChunks })} <Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
</Tag> {priceTip}
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}> </Tag>
{t('core.Total chars', { total: totalChunkChars })} </MyTooltip>
</Tag>
{feConfigs?.show_pay && (
<MyTooltip label={priceTip}>
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
{t('core.dataset.import.Estimated points', { points: predictPoints })}
</Tag>
</MyTooltip>
)}
</Flex>
)}
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
{showPreviewChunks && showRePreview && (
<Button variant={'primaryOutline'} onClick={splitSources2Chunks}>
{t('core.dataset.import.Re Preview')}
</Button>
)} )}
</Flex>
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
<Button <Button
onClick={() => { onClick={() => {
if (showRePreview) {
splitSources2Chunks();
}
goToNext(); goToNext();
}} }}
> >
@ -308,7 +280,9 @@ function DataProcess({
</Button> </Button>
</Flex> </Flex>
</Box> </Box>
<Preview sources={sources} showPreviewChunks={showPreviewChunks} /> <Box flex={'1 0 0'} w={'0'}>
<Preview showPreviewChunks={showPreviewChunks} />
</Box>
{isOpenCustomPrompt && ( {isOpenCustomPrompt && (
<PromptTextarea <PromptTextarea

View File

@ -1,5 +1,4 @@
import React from 'react'; import React from 'react';
import { useImportStore } from '../Provider';
import Preview from '../components/Preview'; import Preview from '../components/Preview';
import { Box, Button, Flex } from '@chakra-ui/react'; import { Box, Button, Flex } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
@ -12,12 +11,11 @@ const PreviewData = ({
goToNext: () => void; goToNext: () => void;
}) => { }) => {
const { t } = useTranslation(); const { t } = useTranslation();
const { sources, setSources } = useImportStore();
return ( return (
<Flex flexDirection={'column'} h={'100%'}> <Flex flexDirection={'column'} h={'100%'}>
<Box flex={'1 0 0 '}> <Box flex={'1 0 0 '}>
<Preview showPreviewChunks={showPreviewChunks} sources={sources} /> <Preview showPreviewChunks={showPreviewChunks} />
</Box> </Box>
<Flex mt={2} justifyContent={'flex-end'}> <Flex mt={2} justifyContent={'flex-end'}>
<Button onClick={goToNext}>{t('common.Next Step')}</Button> <Button onClick={goToNext}>{t('common.Next Step')}</Button>

View File

@ -1,4 +1,4 @@
import React, { useEffect, useState } from 'react'; import React from 'react';
import { import {
Box, Box,
TableContainer, TableContainer,
@ -8,164 +8,109 @@ import {
Th, Th,
Td, Td,
Tbody, Tbody,
Progress,
Flex, Flex,
Button Button
} from '@chakra-ui/react'; } from '@chakra-ui/react';
import { useImportStore, type FormType } from '../Provider'; import { useImportStore, type FormType } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon'; import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest } from '@fastgpt/web/hooks/useRequest'; import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { postCreateTrainingUsage } from '@/web/support/wallet/usage/api';
import { useDatasetStore } from '@/web/core/dataset/store/dataset'; import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { chunksUpload, fileCollectionCreate } from '@/web/core/dataset/utils';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { useToast } from '@fastgpt/web/hooks/useToast'; import { useToast } from '@fastgpt/web/hooks/useToast';
import { useRouter } from 'next/router'; import { useRouter } from 'next/router';
import { TabEnum } from '../../../index'; import { TabEnum } from '../../../index';
import { postCreateDatasetLinkCollection, postDatasetCollection } from '@/web/core/dataset/api'; import {
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants'; postCreateDatasetCsvTableCollection,
import { checkTeamDatasetSizeLimit } from '@/web/support/user/team/api'; postCreateDatasetFileCollection,
postCreateDatasetLinkCollection,
postCreateDatasetTextCollection
} from '@/web/core/dataset/api';
import { getErrText } from '@fastgpt/global/common/error/utils';
import Tag from '@/components/Tag';
const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => { const Upload = () => {
const { t } = useTranslation(); const { t } = useTranslation();
const { toast } = useToast(); const { toast } = useToast();
const router = useRouter(); const router = useRouter();
const { datasetDetail } = useDatasetStore(); const { datasetDetail } = useDatasetStore();
const { parentId, sources, processParamsForm, chunkSize, totalChunks, uploadRate } = const { importSource, parentId, sources, setSources, processParamsForm, chunkSize } =
useImportStore(); useImportStore();
const [uploadList, setUploadList] = useState<
(ImportSourceItemType & {
uploadedFileRate: number;
uploadedChunksRate: number;
})[]
>([]);
const { handleSubmit } = processParamsForm; const { handleSubmit } = processParamsForm;
const { mutate: startUpload, isLoading } = useRequest({ const { mutate: startUpload, isLoading } = useRequest({
mutationFn: async ({ mode, customSplitChar, qaPrompt, webSelector }: FormType) => { mutationFn: async ({ mode, customSplitChar, qaPrompt, webSelector }: FormType) => {
if (uploadList.length === 0) return; if (sources.length === 0) return;
const filterWaitingSources = sources.filter((item) => item.createStatus === 'waiting');
await checkTeamDatasetSizeLimit(totalChunks);
let totalInsertion = 0;
// Batch create collection and upload chunks // Batch create collection and upload chunks
for await (const item of uploadList) { for await (const item of filterWaitingSources) {
// create collection setSources((state) =>
const collectionId = await (async () => { state.map((source) =>
const commonParams = { source.id === item.id
parentId, ? {
trainingType: mode, ...source,
datasetId: datasetDetail._id, createStatus: 'creating'
chunkSize,
chunkSplitter: customSplitChar,
qaPrompt,
name: item.sourceName,
rawTextLength: item.rawText.length,
hashRawText: hashStr(item.rawText)
};
if (item.file) {
return fileCollectionCreate({
file: item.file,
data: {
...commonParams,
collectionMetadata: {
relatedImgId: item.id
} }
}, : source
percentListen: (e) => { )
setUploadList((state) => );
state.map((uploadItem) =>
uploadItem.id === item.id
? {
...uploadItem,
uploadedFileRate: e
}
: uploadItem
)
);
}
});
} else if (item.link) {
const { collectionId } = await postCreateDatasetLinkCollection({
...commonParams,
link: item.link,
metadata: {
webPageSelector: webSelector
}
});
setUploadList((state) =>
state.map((uploadItem) =>
uploadItem.id === item.id
? {
...uploadItem,
uploadedFileRate: 100
}
: uploadItem
)
);
return collectionId;
} else if (item.rawText) {
// manual collection
return postDatasetCollection({
...commonParams,
type: DatasetCollectionTypeEnum.virtual
});
}
return '';
})();
if (!collectionId) continue; // create collection
if (item.link) continue; const commonParams = {
parentId,
trainingType: mode,
datasetId: datasetDetail._id,
chunkSize,
chunkSplitter: customSplitChar,
qaPrompt,
const billId = await postCreateTrainingUsage({ name: item.sourceName
name: item.sourceName, };
datasetId: datasetDetail._id if (importSource === ImportDataSourceEnum.fileLocal && item.dbFileId) {
}); await postCreateDatasetFileCollection({
...commonParams,
fileId: item.dbFileId
});
} else if (importSource === ImportDataSourceEnum.fileLink && item.link) {
await postCreateDatasetLinkCollection({
...commonParams,
link: item.link,
metadata: {
webPageSelector: webSelector
}
});
} else if (importSource === ImportDataSourceEnum.fileCustom && item.rawText) {
// manual collection
await postCreateDatasetTextCollection({
...commonParams,
text: item.rawText
});
} else if (importSource === ImportDataSourceEnum.csvTable && item.dbFileId) {
await postCreateDatasetCsvTableCollection({
...commonParams,
fileId: item.dbFileId
});
}
// upload chunks setSources((state) =>
const chunks = item.chunks; state.map((source) =>
const { insertLen } = await chunksUpload({ source.id === item.id
collectionId, ? {
billId, ...source,
trainingMode: mode, createStatus: 'finish'
chunks, }
rate: uploadRate, : source
onUploading: (e) => { )
setUploadList((state) => );
state.map((uploadItem) =>
uploadItem.id === item.id
? {
...uploadItem,
uploadedChunksRate: e
}
: uploadItem
)
);
},
prompt: qaPrompt
});
totalInsertion += insertLen;
} }
return totalInsertion;
}, },
onSuccess(num) { onSuccess() {
if (showPreviewChunks) { toast({
toast({ title: t('core.dataset.import.Import success'),
title: t('core.dataset.import.Import Success Tip', { num }), status: 'success'
status: 'success' });
});
} else {
toast({
title: t('core.dataset.import.Upload success'),
status: 'success'
});
}
// close import page // close import page
router.replace({ router.replace({
@ -175,21 +120,21 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
} }
}); });
}, },
onError() {
setSources((state) =>
state.map((source) =>
source.createStatus === 'creating'
? {
...source,
createStatus: 'waiting'
}
: source
)
);
},
errorToast: t('common.file.Upload failed') errorToast: t('common.file.Upload failed')
}); });
useEffect(() => {
setUploadList(
sources.map((item) => {
return {
...item,
uploadedFileRate: item.file ? 0 : -1,
uploadedChunksRate: 0
};
})
);
}, []);
return ( return (
<Box> <Box>
<TableContainer> <TableContainer>
@ -199,85 +144,35 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
<Th borderLeftRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}> <Th borderLeftRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}>
{t('core.dataset.import.Source name')} {t('core.dataset.import.Source name')}
</Th> </Th>
{showPreviewChunks ? ( <Th borderBottom={'none'} py={4}>
<> {t('core.dataset.import.Upload status')}
<Th borderBottom={'none'} py={4}> </Th>
{t('core.dataset.Chunk amount')}
</Th>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload file progress')}
</Th>
<Th borderRightRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}>
{t('core.dataset.import.Data file progress')}
</Th>
</>
) : (
<>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload status')}
</Th>
</>
)}
</Tr> </Tr>
</Thead> </Thead>
<Tbody> <Tbody>
{uploadList.map((item) => ( {sources.map((item) => (
<Tr key={item.id}> <Tr key={item.id}>
<Td display={'flex'} alignItems={'center'}> <Td>
<MyIcon name={item.icon as any} w={'16px'} mr={1} /> <Flex alignItems={'center'}>
{item.sourceName} <MyIcon name={item.icon as any} w={'16px'} mr={1} />
<Box whiteSpace={'wrap'} maxW={'30vw'}>
{item.sourceName}
</Box>
</Flex>
</Td>
<Td>
<Box display={'inline-block'}>
{item.createStatus === 'waiting' && (
<Tag colorSchema={'gray'}>{t('common.Waiting')}</Tag>
)}
{item.createStatus === 'creating' && (
<Tag colorSchema={'blue'}>{t('common.Creating')}</Tag>
)}
{item.createStatus === 'finish' && (
<Tag colorSchema={'green'}>{t('common.Finish')}</Tag>
)}
</Box>
</Td> </Td>
{showPreviewChunks ? (
<>
<Td>{item.chunks.length}</Td>
<Td>
{item.uploadedFileRate === -1 ? (
'-'
) : (
<Flex alignItems={'center'} fontSize={'xs'}>
<Progress
value={item.uploadedFileRate}
h={'6px'}
w={'100%'}
maxW={'210px'}
size="sm"
borderRadius={'20px'}
colorScheme={'blue'}
bg="myGray.200"
hasStripe
isAnimated
mr={2}
/>
{`${item.uploadedFileRate}%`}
</Flex>
)}
</Td>
<Td>
<Flex alignItems={'center'} fontSize={'xs'}>
<Progress
value={item.uploadedChunksRate}
h={'6px'}
w={'100%'}
maxW={'210px'}
size="sm"
borderRadius={'20px'}
colorScheme={'purple'}
bg="myGray.200"
hasStripe
isAnimated
mr={2}
/>
{`${item.uploadedChunksRate}%`}
</Flex>
</Td>
</>
) : (
<>
<Td color={item.uploadedFileRate === 100 ? 'green.600' : 'myGray.600'}>
{item.uploadedFileRate === 100 ? t('common.Finish') : t('common.Waiting')}
</Td>
</>
)}
</Tr> </Tr>
))} ))}
</Tbody> </Tbody>
@ -286,8 +181,8 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
<Flex justifyContent={'flex-end'} mt={4}> <Flex justifyContent={'flex-end'} mt={4}>
<Button isLoading={isLoading} onClick={handleSubmit((data) => startUpload(data))}> <Button isLoading={isLoading} onClick={handleSubmit((data) => startUpload(data))}>
{uploadList.length > 0 {sources.length > 0
? `${t('core.dataset.import.Total files', { total: uploadList.length })} | ` ? `${t('core.dataset.import.Total files', { total: sources.length })} | `
: ''} : ''}
{t('core.dataset.import.Start upload')} {t('core.dataset.import.Start upload')}
</Button> </Button>

View File

@ -0,0 +1,296 @@
import MyBox from '@/components/common/MyBox';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { Box, FlexProps } from '@chakra-ui/react';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import React, { DragEvent, useCallback, useMemo, useState } from 'react';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { uploadFile2DB } from '@/web/common/file/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { ImportSourceItemType } from '@/web/core/dataset/type';
export type SelectFileItemType = {
fileId: string;
folderPath: string;
file: File;
};
const FileSelector = ({
fileType,
selectFiles,
setSelectFiles,
onStartSelect,
onFinishSelect,
...props
}: {
fileType: string;
selectFiles: ImportSourceItemType[];
setSelectFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
onStartSelect: () => void;
onFinishSelect: () => void;
} & FlexProps) => {
const { t } = useTranslation();
const { toast } = useToast();
const { feConfigs } = useSystemStore();
const maxCount = feConfigs?.uploadFileMaxAmount || 1000;
const maxSize = (feConfigs?.uploadFileMaxSize || 1024) * 1024 * 1024;
const { File, onOpen } = useSelectFile({
fileType,
multiple: true,
maxCount
});
const [isDragging, setIsDragging] = useState(false);
const isMaxSelected = useMemo(
() => selectFiles.length >= maxCount,
[maxCount, selectFiles.length]
);
const filterTypeReg = new RegExp(
`(${fileType
.split(',')
.map((item) => item.trim())
.join('|')})$`,
'i'
);
const { mutate: onSelectFile, isLoading } = useRequest({
mutationFn: async (files: SelectFileItemType[]) => {
{
onStartSelect();
setSelectFiles((state) => {
const formatFiles = files.map<ImportSourceItemType>((selectFile) => {
const { fileId, file } = selectFile;
return {
id: fileId,
createStatus: 'waiting',
file,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
isUploading: true,
uploadedFileRate: 0
};
});
const results = formatFiles.concat(state).slice(0, maxCount);
return results;
});
try {
// upload file
await Promise.all(
files.map(async ({ fileId, file }) => {
const uploadFileId = await uploadFile2DB({
file,
bucketName: BucketNameEnum.dataset,
percentListen: (e) => {
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
uploadedFileRate: e
}
: item
)
);
}
});
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
dbFileId: uploadFileId,
isUploading: false
}
: item
)
);
})
);
} catch (error) {
console.log(error);
}
onFinishSelect();
}
}
});
const selectFileCallback = useCallback(
(files: SelectFileItemType[]) => {
if (selectFiles.length + files.length > maxCount) {
files = files.slice(0, maxCount - selectFiles.length);
toast({
status: 'warning',
title: t('common.file.Some file count exceeds limit', { maxCount })
});
}
// size check
if (!maxSize) {
return onSelectFile(files);
}
const filterFiles = files.filter((item) => item.file.size <= maxSize);
if (filterFiles.length < files.length) {
toast({
status: 'warning',
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
});
}
return onSelectFile(filterFiles);
},
[maxCount, maxSize, onSelectFile, selectFiles.length, t, toast]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
};
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
const items = e.dataTransfer.items;
const fileList: SelectFileItemType[] = [];
if (e.dataTransfer.items.length <= 1) {
const traverseFileTree = async (item: any) => {
return new Promise<void>((resolve, reject) => {
if (item.isFile) {
item.file((file: File) => {
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
if (filterTypeReg.test(file.name)) {
fileList.push({
fileId: getNanoid(),
folderPath,
file
});
}
resolve();
});
} else if (item.isDirectory) {
const dirReader = item.createReader();
dirReader.readEntries(async (entries: any[]) => {
for (let i = 0; i < entries.length; i++) {
await traverseFileTree(entries[i]);
}
resolve();
});
}
});
};
for await (const item of items) {
await traverseFileTree(item.webkitGetAsEntry());
}
} else {
const files = Array.from(e.dataTransfer.files);
let isErr = files.some((item) => item.type === '');
if (isErr) {
return toast({
title: t('file.upload error description'),
status: 'error'
});
}
fileList.push(
...files
.filter((item) => filterTypeReg.test(item.name))
.map((file) => ({
fileId: getNanoid(),
folderPath: '',
file
}))
);
}
selectFileCallback(fileList.slice(0, maxCount));
};
return (
<MyBox
isLoading={isLoading}
display={'flex'}
flexDirection={'column'}
alignItems={'center'}
justifyContent={'center'}
px={3}
py={[4, 7]}
borderWidth={'1.5px'}
borderStyle={'dashed'}
borderRadius={'md'}
{...(isMaxSelected
? {}
: {
cursor: 'pointer',
_hover: {
bg: 'primary.50',
borderColor: 'primary.600'
},
borderColor: isDragging ? 'primary.600' : 'borderColor.high',
onDragEnter: handleDragEnter,
onDragOver: (e) => e.preventDefault(),
onDragLeave: handleDragLeave,
onDrop: handleDrop,
onClick: onOpen
})}
{...props}
>
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
{isMaxSelected ? (
<>
<Box color={'myGray.500'} fontSize={'xs'}>
</Box>
</>
) : (
<>
<Box fontWeight={'bold'}>
{isDragging
? t('file.Release the mouse to upload the file')
: t('common.file.Select and drag file tip')}
</Box>
{/* file type */}
<Box color={'myGray.500'} fontSize={'xs'}>
{t('common.file.Support file type', { fileType })}
</Box>
<Box color={'myGray.500'} fontSize={'xs'}>
{/* max count */}
{maxCount && t('common.file.Support max count', { maxCount })}
{/* max size */}
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
</Box>
<File
onSelect={(files) =>
selectFileCallback(
files.map((file) => ({
fileId: getNanoid(),
folderPath: '',
file
}))
)
}
/>
</>
)}
</MyBox>
);
};
export default React.memo(FileSelector);

View File

@ -3,9 +3,9 @@ import MyModal from '@fastgpt/web/components/common/MyModal';
import { ModalBody, ModalFooter, Button } from '@chakra-ui/react'; import { ModalBody, ModalFooter, Button } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio'; import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
import { ImportDataSourceEnum } from '..';
import { useRouter } from 'next/router'; import { useRouter } from 'next/router';
import { TabEnum } from '../../..'; import { TabEnum } from '../../..';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
const FileModeSelector = ({ onClose }: { onClose: () => void }) => { const FileModeSelector = ({ onClose }: { onClose: () => void }) => {
const { t } = useTranslation(); const { t } = useTranslation();

View File

@ -1,132 +1,94 @@
import React, { useMemo, useState } from 'react'; import React, { useState } from 'react';
import { Box, Flex } from '@chakra-ui/react'; import { Box, Flex, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon'; import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
import RowTabs from '@fastgpt/web/components/common/Tabs/RowTabs'; import { useImportStore } from '../Provider';
import MyMenu from '@/components/MyMenu';
import { ImportSourceItemType } from '@/web/core/dataset/type'; import { ImportSourceItemType } from '@/web/core/dataset/type';
import dynamic from 'next/dynamic';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
const PreviewChunks = dynamic(() => import('./PreviewChunks'));
enum PreviewListEnum { const Preview = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
chunks = 'chunks',
sources = 'sources'
}
const Preview = ({
sources,
showPreviewChunks
}: {
sources: ImportSourceItemType[];
showPreviewChunks: boolean;
}) => {
const { t } = useTranslation(); const { t } = useTranslation();
const [previewListType, setPreviewListType] = useState(
showPreviewChunks ? PreviewListEnum.chunks : PreviewListEnum.sources
);
const chunks = useMemo(() => { const { sources } = useImportStore();
const oneSourceChunkLength = Math.max(4, Math.floor(50 / sources.length)); const [previewRawTextSource, setPreviewRawTextSource] = useState<ImportSourceItemType>();
return sources const [previewChunkSource, setPreviewChunkSource] = useState<ImportSourceItemType>();
.map((source) =>
source.chunks.slice(0, oneSourceChunkLength).map((chunk, i) => ({
...chunk,
index: i + 1,
sourceName: source.sourceName,
sourceIcon: source.icon
}))
)
.flat();
}, [sources]);
return ( return (
<Box h={'100%'} display={['block', 'flex']} flexDirection={'column'} flex={'1 0 0'}> <Box h={'100%'} display={['block', 'flex']} flexDirection={'column'}>
<Box> <Flex alignItems={'center'}>
<RowTabs <MyIcon name={'core/dataset/fileCollection'} w={'20px'} />
list={[ <Box fontSize={'lg'}>{t('core.dataset.import.Sources list')}</Box>
...(showPreviewChunks </Flex>
? [ <Box mt={3} flex={'1 0 0'} width={'100%'} overflow={'auto'}>
{ {sources.map((source) => (
icon: 'common/viewLight', <Flex
label: t('core.dataset.import.Preview chunks'), key={source.id}
value: PreviewListEnum.chunks bg={'white'}
p={4}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
mb={3}
alignItems={'center'}
>
<MyIcon name={source.icon as any} w={'16px'} />
<Box mx={1} flex={'1 0 0'} w={0} className="textEllipsis">
{source.sourceName}
</Box>
{showPreviewChunks && (
<Box fontSize={'xs'} color={'myGray.600'}>
<MyMenu
Button={
<IconButton
icon={<MyIcon name={'common/viewLight'} w={'14px'} p={2} />}
aria-label={''}
size={'sm'}
variant={'whitePrimary'}
/>
} }
] menuList={[
: []), {
{ label: (
icon: 'core/dataset/fileCollection', <Flex alignItems={'center'}>
label: t('core.dataset.import.Sources list'), <MyIcon name={'core/dataset/fileCollection'} w={'14px'} mr={2} />
value: PreviewListEnum.sources {t('core.dataset.import.Preview raw text')}
} </Flex>
]} ),
value={previewListType} onClick: () => setPreviewRawTextSource(source)
onChange={(e) => setPreviewListType(e as PreviewListEnum)} },
/> {
</Box> label: (
<Box mt={3} flex={'1 0 0'} overflow={'auto'}> <Flex alignItems={'center'}>
{previewListType === PreviewListEnum.chunks ? ( <MyIcon name={'core/dataset/splitLight'} w={'14px'} mr={2} />
<> {t('core.dataset.import.Preview chunks')}
{chunks.map((chunk, i) => ( </Flex>
<Box ),
key={i} onClick: () => setPreviewChunkSource(source)
p={4} }
bg={'white'} ]}
mb={3} />
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
whiteSpace={'pre-wrap'}
>
<Flex mb={1} alignItems={'center'} fontSize={'sm'}>
<Box
flexShrink={0}
px={1}
color={'primary.600'}
borderWidth={'1px'}
borderColor={'primary.200'}
bg={'primary.50'}
borderRadius={'sm'}
>
# {chunk.index}
</Box>
<Flex ml={2} fontWeight={'bold'} alignItems={'center'} gap={1}>
<MyIcon name={chunk.sourceIcon as any} w={'14px'} />
{chunk.sourceName}
</Flex>
</Flex>
<Box fontSize={'xs'} whiteSpace={'pre-wrap'} wordBreak={'break-all'}>
<Box color={'myGray.900'}>{chunk.q}</Box>
<Box color={'myGray.500'}>{chunk.a}</Box>
</Box>
</Box> </Box>
))} )}
</> </Flex>
) : ( ))}
<>
{sources.map((source) => (
<Flex
key={source.id}
bg={'white'}
p={4}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
mb={3}
>
<MyIcon name={source.icon as any} w={'16px'} />
<Box mx={1} flex={'1 0 0'} className="textEllipsis">
{source.sourceName}
</Box>
{showPreviewChunks && (
<Box>
{t('core.dataset.import.File chunk amount', { amount: source.chunks.length })}
</Box>
)}
</Flex>
))}
</>
)}
</Box> </Box>
{!!previewRawTextSource && (
<PreviewRawText
previewSource={previewRawTextSource}
onClose={() => setPreviewRawTextSource(undefined)}
/>
)}
{!!previewChunkSource && (
<PreviewChunks
previewSource={previewChunkSource}
onClose={() => setPreviewChunkSource(undefined)}
/>
)}
</Box> </Box>
); );
}; };

View File

@ -0,0 +1,95 @@
import React, { useMemo } from 'react';
import { Box } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { useQuery } from '@tanstack/react-query';
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
import { getPreviewChunks } from '@/web/core/dataset/api';
import { useImportStore } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
const PreviewChunks = ({
previewSource,
onClose
}: {
previewSource: ImportSourceItemType;
onClose: () => void;
}) => {
const { toast } = useToast();
const { importSource, chunkSize, chunkOverlapRatio, processParamsForm } = useImportStore();
const { data = [], isLoading } = useQuery(
['previewSource'],
() => {
if (
importSource === ImportDataSourceEnum.fileLocal ||
importSource === ImportDataSourceEnum.csvTable ||
importSource === ImportDataSourceEnum.fileLink
) {
return getPreviewChunks({
type: importSource,
sourceId: previewSource.dbFileId || previewSource.link || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar')
});
} else if (importSource === ImportDataSourceEnum.fileCustom) {
const customSplitChar = processParamsForm.getValues('customSplitChar');
const { chunks } = splitText2Chunks({
text: previewSource.rawText || '',
chunkLen: chunkSize,
overlapRatio: chunkOverlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
});
return chunks.map((chunk) => ({
q: chunk,
a: ''
}));
}
return [];
},
{
onError(err) {
toast({
status: 'warning',
title: getErrText(err)
});
}
}
);
return (
<MyRightDrawer
onClose={onClose}
iconSrc={previewSource.icon}
title={previewSource.sourceName}
isLoading={isLoading}
maxW={['90vw', '40vw']}
>
{data.map((item, index) => (
<Box
key={index}
whiteSpace={'pre-wrap'}
fontSize={'sm'}
p={4}
bg={index % 2 === 0 ? 'white' : 'myWhite.600'}
mb={3}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
_notLast={{
mb: 2
}}
>
<Box color={'myGray.900'}>{item.q}</Box>
<Box color={'myGray.500'}>{item.a}</Box>
</Box>
))}
</MyRightDrawer>
);
};
export default React.memo(PreviewChunks);

View File

@ -1,28 +1,73 @@
import React from 'react'; import React from 'react';
import MyModal from '@fastgpt/web/components/common/MyModal'; import { Box } from '@chakra-ui/react';
import { ModalBody } from '@chakra-ui/react'; import { ImportSourceItemType } from '@/web/core/dataset/type';
import { useQuery } from '@tanstack/react-query';
export type PreviewRawTextProps = { import { getPreviewFileContent } from '@/web/common/file/api';
icon: string; import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
title: string; import { useImportStore } from '../Provider';
rawText: string; import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
}; import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
const PreviewRawText = ({ const PreviewRawText = ({
icon, previewSource,
title,
rawText,
onClose onClose
}: PreviewRawTextProps & { }: {
previewSource: ImportSourceItemType;
onClose: () => void; onClose: () => void;
}) => { }) => {
const { toast } = useToast();
const { importSource } = useImportStore();
const { data, isLoading } = useQuery(
['previewSource', previewSource?.dbFileId],
() => {
if (importSource === ImportDataSourceEnum.fileLocal && previewSource.dbFileId) {
return getPreviewFileContent({
fileId: previewSource.dbFileId,
csvFormat: true
});
}
if (importSource === ImportDataSourceEnum.csvTable && previewSource.dbFileId) {
return getPreviewFileContent({
fileId: previewSource.dbFileId,
csvFormat: false
});
}
if (importSource === ImportDataSourceEnum.fileCustom) {
return {
previewContent: (previewSource.rawText || '').slice(0, 3000)
};
}
return {
previewContent: ''
};
},
{
onError(err) {
toast({
status: 'warning',
title: getErrText(err)
});
}
}
);
const rawText = data?.previewContent || '';
return ( return (
<MyModal isOpen onClose={onClose} iconSrc={icon} title={title}> <MyRightDrawer
<ModalBody whiteSpace={'pre-wrap'} overflowY={'auto'}> onClose={onClose}
iconSrc={previewSource.icon}
title={previewSource.sourceName}
isLoading={isLoading}
>
<Box whiteSpace={'pre-wrap'} overflowY={'auto'} fontSize={'sm'}>
{rawText} {rawText}
</ModalBody> </Box>
</MyModal> </MyRightDrawer>
); );
}; };
export default PreviewRawText; export default React.memo(PreviewRawText);

View File

@ -0,0 +1,119 @@
import React, { useState } from 'react';
import {
Flex,
TableContainer,
Table,
Thead,
Tr,
Th,
Td,
Tbody,
Progress,
IconButton
} from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import MyTooltip from '@/components/MyTooltip';
import dynamic from 'next/dynamic';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
export const RenderUploadFiles = ({
files,
setFiles,
showPreviewContent
}: {
files: ImportSourceItemType[];
setFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
showPreviewContent?: boolean;
}) => {
const { t } = useTranslation();
const [previewFile, setPreviewFile] = useState<ImportSourceItemType>();
return files.length > 0 ? (
<>
<TableContainer mt={5}>
<Table variant={'simple'} fontSize={'sm'} draggable={false}>
<Thead draggable={false}>
<Tr bg={'myGray.100'} mb={2}>
<Th borderLeftRadius={'md'} borderBottom={'none'} py={4}>
{t('common.file.File Name')}
</Th>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload file progress')}
</Th>
<Th borderBottom={'none'} py={4}>
{t('common.file.File Size')}
</Th>
<Th borderRightRadius={'md'} borderBottom={'none'} py={4}>
{t('common.Action')}
</Th>
</Tr>
</Thead>
<Tbody>
{files.map((item) => (
<Tr key={item.id}>
<Td>
<Flex alignItems={'center'}>
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
{item.sourceName}
</Flex>
</Td>
<Td>
<Flex alignItems={'center'} fontSize={'xs'}>
<Progress
value={item.uploadedFileRate}
h={'6px'}
w={'100%'}
maxW={'210px'}
size="sm"
borderRadius={'20px'}
colorScheme={(item.uploadedFileRate || 0) >= 100 ? 'green' : 'blue'}
bg="myGray.200"
hasStripe
isAnimated
mr={2}
/>
{`${item.uploadedFileRate}%`}
</Flex>
</Td>
<Td>{item.sourceSize}</Td>
<Td>
{!item.isUploading && (
<Flex alignItems={'center'} gap={4}>
{showPreviewContent && (
<MyTooltip label={t('core.dataset.import.Preview raw text')}>
<IconButton
variant={'whitePrimary'}
size={'sm'}
icon={<MyIcon name={'common/viewLight'} w={'18px'} />}
aria-label={''}
onClick={() => setPreviewFile(item)}
/>
</MyTooltip>
)}
<IconButton
variant={'grayDanger'}
size={'sm'}
icon={<MyIcon name={'delete'} w={'14px'} />}
aria-label={''}
onClick={() => {
setFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
)}
</Td>
</Tr>
))}
</Tbody>
</Table>
</TableContainer>
{!!previewFile && (
<PreviewRawText previewSource={previewFile} onClose={() => setPreviewFile(undefined)} />
)}
</>
) : null;
};

View File

@ -1,4 +1,4 @@
import React, { useEffect } from 'react'; import React, { useCallback, useEffect } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d'; import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
import dynamic from 'next/dynamic'; import dynamic from 'next/dynamic';
@ -19,7 +19,7 @@ const CustomTet = ({ activeStep, goToNext }: ImportDataComponentProps) => {
<> <>
{activeStep === 0 && <CustomTextInput goToNext={goToNext} />} {activeStep === 0 && <CustomTextInput goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />} {activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks />} {activeStep === 2 && <Upload />}
</> </>
); );
}; };
@ -36,6 +36,24 @@ const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
} }
}); });
const onSubmit = useCallback(
(data: { name: string; value: string }) => {
const fileId = getNanoid(32);
setSources([
{
id: fileId,
createStatus: 'waiting',
rawText: data.value,
sourceName: data.name,
icon: 'file/fill/manual'
}
]);
goToNext();
},
[goToNext, setSources]
);
useEffect(() => { useEffect(() => {
const source = sources[0]; const source = sources[0];
if (source) { if (source) {
@ -78,25 +96,7 @@ const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
/> />
</Box> </Box>
<Flex mt={5} justifyContent={'flex-end'}> <Flex mt={5} justifyContent={'flex-end'}>
<Button <Button onClick={handleSubmit((data) => onSubmit(data))}>{t('common.Next Step')}</Button>
onClick={handleSubmit((data) => {
const fileId = getNanoid(32);
setSources([
{
id: fileId,
rawText: data.value,
chunks: [],
chunkChars: 0,
sourceName: data.name,
icon: 'file/fill/manual'
}
]);
goToNext();
})}
>
{t('common.Next Step')}
</Button>
</Flex> </Flex>
</Box> </Box>
); );

View File

@ -23,7 +23,7 @@ const LinkCollection = ({ activeStep, goToNext }: ImportDataComponentProps) => {
<> <>
{activeStep === 0 && <CustomLinkImport goToNext={goToNext} />} {activeStep === 0 && <CustomLinkImport goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks={false} goToNext={goToNext} />} {activeStep === 1 && <DataProcess showPreviewChunks={false} goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks={false} />} {activeStep === 2 && <Upload />}
</> </>
); );
}; };
@ -128,10 +128,8 @@ const CustomLinkImport = ({ goToNext }: { goToNext: () => void }) => {
setSources( setSources(
newLinkList.map((link) => ({ newLinkList.map((link) => ({
id: getNanoid(32), id: getNanoid(32),
createStatus: 'waiting',
link, link,
rawText: '',
chunks: [],
chunkChars: 0,
sourceName: link, sourceName: link,
icon: LinkCollectionIcon icon: LinkCollectionIcon
})) }))

View File

@ -1,41 +1,27 @@
import React, { useEffect, useMemo, useState } from 'react'; import React, { useCallback, useEffect, useMemo, useState } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d'; import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button, Flex } from '@chakra-ui/react'; import { Box, Button } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type.d'; import FileSelector from '../components/FileSelector';
import FileSelector, { type SelectFileItemType } from '@/web/core/dataset/components/FileSelector';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { readFileRawContent } from '@fastgpt/web/common/file/read';
import { getUploadBase64ImgController } from '@/web/common/file/controller';
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
import MyTooltip from '@/components/MyTooltip';
import type { PreviewRawTextProps } from '../components/PreviewRawText';
import { useImportStore } from '../Provider'; import { useImportStore } from '../Provider';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import dynamic from 'next/dynamic'; import dynamic from 'next/dynamic';
import Loading from '@fastgpt/web/components/common/MyLoading'; import Loading from '@fastgpt/web/components/common/MyLoading';
import { RenderUploadFiles } from '../components/RenderFiles';
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), { const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
loading: () => <Loading fixed={false} /> loading: () => <Loading fixed={false} />
}); });
const Upload = dynamic(() => import('../commonProgress/Upload')); const Upload = dynamic(() => import('../commonProgress/Upload'));
const PreviewRawText = dynamic(() => import('../components/PreviewRawText'));
type FileItemType = ImportSourceItemType & { file: File }; const fileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
const fileType = '.txt, .docx, .csv, .pdf, .md, .html';
const maxSelectFileCount = 1000;
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => { const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
return ( return (
<> <>
{activeStep === 0 && <SelectFile goToNext={goToNext} />} {activeStep === 0 && <SelectFile goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />} {activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks />} {activeStep === 2 && <Upload />}
</> </>
); );
}; };
@ -44,135 +30,47 @@ export default React.memo(FileLocal);
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) { const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
const { t } = useTranslation(); const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { sources, setSources } = useImportStore(); const { sources, setSources } = useImportStore();
// @ts-ignore const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
const [selectFiles, setSelectFiles] = useState<FileItemType[]>(sources); sources.map((source) => ({
isUploading: false,
...source
}))
);
const [uploading, setUploading] = useState(false);
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]); const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
const [previewRaw, setPreviewRaw] = useState<PreviewRawTextProps>();
useEffect(() => { useEffect(() => {
setSources(successFiles); setSources(successFiles);
}, [successFiles]); }, [setSources, successFiles]);
const { mutate: onSelectFile, isLoading } = useRequest({ const onclickNext = useCallback(() => {
mutationFn: async (files: SelectFileItemType[]) => { // filter uploaded files
{ setSelectFiles((state) => state.filter((item) => (item.uploadedFileRate || 0) >= 100));
for await (const selectFile of files) { goToNext();
const { file, folderPath } = selectFile; }, [goToNext]);
const relatedId = getNanoid(32);
const { rawText } = await (() => {
try {
return readFileRawContent({
file,
uploadBase64Controller: (base64Img) =>
getUploadBase64ImgController({
base64Img,
type: MongoImageTypeEnum.collectionImage,
metadata: {
relatedId
}
})
});
} catch (error) {
return { rawText: '' };
}
})();
const item: FileItemType = {
id: relatedId,
file,
rawText,
chunks: [],
chunkChars: 0,
sourceFolderPath: folderPath,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
errorMsg: rawText.length === 0 ? t('common.file.Empty file tip') : ''
};
setSelectFiles((state) => {
const results = [item].concat(state).slice(0, maxSelectFileCount);
return results;
});
}
}
}
});
return ( return (
<Box> <Box>
<FileSelector <FileSelector
isLoading={isLoading}
fileType={fileType} fileType={fileType}
multiple selectFiles={selectFiles}
maxCount={maxSelectFileCount} setSelectFiles={setSelectFiles}
maxSize={(feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024} onStartSelect={() => setUploading(true)}
onSelectFile={onSelectFile} onFinishSelect={() => setUploading(false)}
/> />
{/* render files */} {/* render files */}
<Flex my={4} flexWrap={'wrap'} gap={5} alignItems={'center'}> <RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} showPreviewContent />
{selectFiles.map((item) => (
<MyTooltip key={item.id} label={t('core.dataset.import.Preview raw text')}>
<Flex
alignItems={'center'}
px={4}
py={3}
borderRadius={'md'}
bg={'myGray.100'}
cursor={'pointer'}
onClick={() =>
setPreviewRaw({
icon: item.icon,
title: item.sourceName,
rawText: item.rawText.slice(0, 10000)
})
}
>
<MyIcon name={item.icon as any} w={'16px'} />
<Box ml={1} mr={3}>
{item.sourceName}
</Box>
<Box mr={1} fontSize={'xs'} color={'myGray.500'}>
{item.sourceSize}
{item.rawText.length > 0 && (
<>,{t('common.Number of words', { amount: item.rawText.length })}</>
)}
</Box>
{item.errorMsg && (
<MyTooltip label={item.errorMsg}>
<MyIcon name={'common/errorFill'} w={'14px'} mr={3} />
</MyTooltip>
)}
<MyIcon
name={'common/closeLight'}
w={'14px'}
color={'myGray.500'}
cursor={'pointer'}
onClick={(e) => {
e.stopPropagation();
setSelectFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
</MyTooltip>
))}
</Flex>
<Box textAlign={'right'}> <Box textAlign={'right'} mt={5}>
<Button isDisabled={successFiles.length === 0 || isLoading} onClick={goToNext}> <Button isDisabled={successFiles.length === 0 || uploading} onClick={onclickNext}>
{selectFiles.length > 0 {selectFiles.length > 0
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | ` ? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
: ''} : ''}
{t('common.Next Step')} {t('common.Next Step')}
</Button> </Button>
</Box> </Box>
{previewRaw && <PreviewRawText {...previewRaw} onClose={() => setPreviewRaw(undefined)} />}
</Box> </Box>
); );
}); });

View File

@ -1,108 +1,62 @@
import React, { useEffect, useMemo, useState } from 'react'; import React, { useEffect, useMemo, useState } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d'; import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button, Flex } from '@chakra-ui/react'; import { Box, Button } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type.d'; import FileSelector from '../components/FileSelector';
import FileSelector, { type SelectFileItemType } from '@/web/core/dataset/components/FileSelector';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import MyTooltip from '@/components/MyTooltip';
import { useImportStore } from '../Provider'; import { useImportStore } from '../Provider';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import dynamic from 'next/dynamic'; import dynamic from 'next/dynamic';
import { fileDownload } from '@/web/common/file/utils'; import { fileDownload } from '@/web/common/file/utils';
import { readCsvContent } from '@fastgpt/web/common/file/read/csv'; import { RenderUploadFiles } from '../components/RenderFiles';
const PreviewData = dynamic(() => import('../commonProgress/PreviewData')); const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
const Upload = dynamic(() => import('../commonProgress/Upload')); const Upload = dynamic(() => import('../commonProgress/Upload'));
type FileItemType = ImportSourceItemType & { file: File };
const fileType = '.csv'; const fileType = '.csv';
const maxSelectFileCount = 1000;
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => { const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
return ( return (
<> <>
{activeStep === 0 && <SelectFile goToNext={goToNext} />} {activeStep === 0 && <SelectFile goToNext={goToNext} />}
{activeStep === 1 && <PreviewData showPreviewChunks goToNext={goToNext} />} {activeStep === 1 && <PreviewData showPreviewChunks goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks />} {activeStep === 2 && <Upload />}
</> </>
); );
}; };
export default React.memo(FileLocal); export default React.memo(FileLocal);
const csvTemplate = `index,content const csvTemplate = `"第一列内容","第二列内容"
"必填内容","可选内容。CSV 中请注意内容不能包含双引号,双引号是列分割符号" "必填列","可选列。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
"只会讲第一和第二列内容导入,其余列会被忽略",""
"结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。","" "结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。",""
"AIGC发展分为几个阶段","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`; "AIGC发展分为几个阶段","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`;
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) { const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
const { t } = useTranslation(); const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { sources, setSources } = useImportStore(); const { sources, setSources } = useImportStore();
// @ts-ignore const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
const [selectFiles, setSelectFiles] = useState<FileItemType[]>(sources); sources.map((source) => ({
isUploading: false,
...source
}))
);
const [uploading, setUploading] = useState(false);
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]); const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
useEffect(() => { useEffect(() => {
setSources(successFiles); setSources(successFiles);
}, [successFiles]); }, [successFiles]);
const { mutate: onSelectFile, isLoading } = useRequest({
mutationFn: async (files: SelectFileItemType[]) => {
{
for await (const selectFile of files) {
const { file, folderPath } = selectFile;
const { header, data } = await readCsvContent({ file });
const filterData: FileItemType['chunks'] = data
.filter((item) => item[0])
.map((item) => ({
q: item[0] || '',
a: item[1] || '',
chunkIndex: 0
}));
const item: FileItemType = {
id: getNanoid(32),
file,
rawText: '',
chunks: filterData,
chunkChars: 0,
sourceFolderPath: folderPath,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
errorMsg:
header[0] !== 'index' || header[1] !== 'content' || filterData.length === 0
? t('core.dataset.import.Csv format error')
: ''
};
setSelectFiles((state) => {
const results = [item].concat(state).slice(0, 10);
return results;
});
}
}
},
errorToast: t('common.file.Select failed')
});
return ( return (
<Box> <Box>
<FileSelector <FileSelector
multiple
maxCount={maxSelectFileCount}
maxSize={(feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024}
isLoading={isLoading}
fileType={fileType} fileType={fileType}
onSelectFile={onSelectFile} selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
onStartSelect={() => setUploading(true)}
onFinishSelect={() => setUploading(false)}
/> />
<Box <Box
@ -122,43 +76,16 @@ const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () =
</Box> </Box>
{/* render files */} {/* render files */}
<Flex my={4} flexWrap={'wrap'} gap={5} alignItems={'center'}> <RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
{selectFiles.map((item) => (
<Flex
key={item.id}
alignItems={'center'}
px={4}
py={2}
borderRadius={'md'}
bg={'myGray.100'}
>
<MyIcon name={item.icon as any} w={'16px'} />
<Box ml={1} mr={3}>
{item.sourceName}
</Box>
<Box mr={1} fontSize={'xs'} color={'myGray.500'}>
{item.sourceSize}
</Box>
{item.errorMsg && (
<MyTooltip label={item.errorMsg}>
<MyIcon name={'common/errorFill'} w={'14px'} mr={3} />
</MyTooltip>
)}
<MyIcon
name={'common/closeLight'}
w={'14px'}
color={'myGray.500'}
cursor={'pointer'}
onClick={() => {
setSelectFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
))}
</Flex>
<Box textAlign={'right'}> <Box textAlign={'right'} mt={5}>
<Button isDisabled={successFiles.length === 0 || isLoading} onClick={goToNext}> <Button
isDisabled={successFiles.length === 0 || uploading}
onClick={() => {
setSelectFiles((state) => state.filter((item) => !item.errorMsg));
goToNext();
}}
>
{selectFiles.length > 0 {selectFiles.length > 0
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | ` ? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
: ''} : ''}

View File

@ -6,22 +6,15 @@ import { useRouter } from 'next/router';
import { TabEnum } from '../../index'; import { TabEnum } from '../../index';
import { useMyStep } from '@fastgpt/web/hooks/useStep'; import { useMyStep } from '@fastgpt/web/hooks/useStep';
import dynamic from 'next/dynamic'; import dynamic from 'next/dynamic';
import Provider from './Provider';
import { useDatasetStore } from '@/web/core/dataset/store/dataset'; import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import Provider from './Provider';
const FileLocal = dynamic(() => import('./diffSource/FileLocal')); const FileLocal = dynamic(() => import('./diffSource/FileLocal'));
const FileLink = dynamic(() => import('./diffSource/FileLink')); const FileLink = dynamic(() => import('./diffSource/FileLink'));
const FileCustomText = dynamic(() => import('./diffSource/FileCustomText')); const FileCustomText = dynamic(() => import('./diffSource/FileCustomText'));
const TableLocal = dynamic(() => import('./diffSource/TableLocal')); const TableLocal = dynamic(() => import('./diffSource/TableLocal'));
export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
tableLocal = 'tableLocal'
}
const ImportDataset = () => { const ImportDataset = () => {
const { t } = useTranslation(); const { t } = useTranslation();
const router = useRouter(); const router = useRouter();
@ -65,7 +58,7 @@ const ImportDataset = () => {
title: t('core.dataset.import.Upload data') title: t('core.dataset.import.Upload data')
} }
], ],
[ImportDataSourceEnum.tableLocal]: [ [ImportDataSourceEnum.csvTable]: [
{ {
title: t('core.dataset.import.Select file') title: t('core.dataset.import.Select file')
}, },
@ -88,7 +81,7 @@ const ImportDataset = () => {
if (source === ImportDataSourceEnum.fileLocal) return FileLocal; if (source === ImportDataSourceEnum.fileLocal) return FileLocal;
if (source === ImportDataSourceEnum.fileLink) return FileLink; if (source === ImportDataSourceEnum.fileLink) return FileLink;
if (source === ImportDataSourceEnum.fileCustom) return FileCustomText; if (source === ImportDataSourceEnum.fileCustom) return FileCustomText;
if (source === ImportDataSourceEnum.tableLocal) return TableLocal; if (source === ImportDataSourceEnum.csvTable) return TableLocal;
}, [source]); }, [source]);
return ImportComponent ? ( return ImportComponent ? (
@ -142,7 +135,7 @@ const ImportDataset = () => {
<MyStep /> <MyStep />
</Box> </Box>
</Box> </Box>
<Provider dataset={datasetDetail} parentId={parentId}> <Provider dataset={datasetDetail} parentId={parentId} importSource={source}>
<Box flex={'1 0 0'} overflow={'auto'} position={'relative'}> <Box flex={'1 0 0'} overflow={'auto'} position={'relative'}>
<ImportComponent activeStep={activeStep} goToNext={goToNext} /> <ImportComponent activeStep={activeStep} goToNext={goToNext} />
</Box> </Box>

View File

@ -0,0 +1,7 @@
import { ImportSourceItemType } from '@/web/core/dataset/type';
export type UploadFileItemType = ImportSourceItemType & {
file?: File;
isUploading: boolean;
uploadedFileRate: number;
};

View File

@ -1,19 +1,5 @@
import React, { useEffect, useMemo, useState } from 'react'; import React, { useEffect, useMemo, useState } from 'react';
import { import { Box, Textarea, Button, Flex, useTheme, useDisclosure } from '@chakra-ui/react';
Box,
Textarea,
Button,
Flex,
useTheme,
useDisclosure,
Table,
Thead,
Tbody,
Tr,
Th,
Td,
TableContainer
} from '@chakra-ui/react';
import { useDatasetStore } from '@/web/core/dataset/store/dataset'; import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useSearchTestStore, SearchTestStoreItemType } from '@/web/core/dataset/store/searchTest'; import { useSearchTestStore, SearchTestStoreItemType } from '@/web/core/dataset/store/searchTest';
import { postSearchText } from '@/web/core/dataset/api'; import { postSearchText } from '@/web/core/dataset/api';
@ -36,10 +22,7 @@ import { useForm } from 'react-hook-form';
import MySelect from '@fastgpt/web/components/common/MySelect'; import MySelect from '@fastgpt/web/components/common/MySelect';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile'; import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { fileDownload } from '@/web/common/file/utils'; import { fileDownload } from '@/web/common/file/utils';
import { readCsvContent } from '@fastgpt/web/common/file/read/csv';
import { delay } from '@fastgpt/global/common/system/utils';
import QuoteItem from '@/components/core/dataset/QuoteItem'; import QuoteItem from '@/components/core/dataset/QuoteItem';
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
import { useSystemStore } from '@/web/common/system/useSystemStore'; import { useSystemStore } from '@/web/common/system/useSystemStore';
import SearchParamsTip from '@/components/core/dataset/SearchParamsTip'; import SearchParamsTip from '@/components/core/dataset/SearchParamsTip';
@ -134,34 +117,6 @@ const Test = ({ datasetId }: { datasetId: string }) => {
}); });
} }
}); });
// const { mutate: onFileTest, isLoading: fileTestIsLoading } = useRequest({
// mutationFn: async ({ searchParams }: FormType) => {
// if (!selectFile) return Promise.reject('File is not selected');
// const { data } = await readCsvContent({ file: selectFile });
// const testList = data.slice(0, 100);
// const results: SearchTestResponse[] = [];
// for await (const item of testList) {
// try {
// const result = await postSearchText({ datasetId, text: item[0].trim(), ...searchParams });
// results.push(result);
// } catch (error) {
// await delay(500);
// }
// }
// return results;
// },
// onSuccess(res: SearchTestResponse[]) {
// console.log(res);
// },
// onError(err) {
// toast({
// title: getErrText(err),
// status: 'error'
// });
// }
// });
const onSelectFile = async (files: File[]) => { const onSelectFile = async (files: File[]) => {
const file = files[0]; const file = files[0];

View File

@ -101,7 +101,9 @@ const Standard = ({
{t('support.wallet.subscription.Sub plan')} {t('support.wallet.subscription.Sub plan')}
</Box> </Box>
<Box mt={8} mb={10} color={'myGray.500'} fontSize={'lg'}> <Box mt={8} mb={10} color={'myGray.500'} fontSize={'lg'}>
{t('support.wallet.subscription.Sub plan tip')} {t('support.wallet.subscription.Sub plan tip', {
title: feConfigs?.systemTitle
})}
</Box> </Box>
<Box> <Box>
<RowTabs <RowTabs

View File

@ -13,7 +13,7 @@ import { checkTeamAiPointsAndLock } from './utils';
import { checkInvalidChunkAndLock } from '@fastgpt/service/core/dataset/training/utils'; import { checkInvalidChunkAndLock } from '@fastgpt/service/core/dataset/training/utils';
import { addMinutes } from 'date-fns'; import { addMinutes } from 'date-fns';
import { countGptMessagesTokens } from '@fastgpt/global/common/string/tiktoken'; import { countGptMessagesTokens } from '@fastgpt/global/common/string/tiktoken';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller'; import { pushDataListToTrainingQueueByCollectionId } from '@fastgpt/service/core/dataset/training/controller';
const reduceQueue = () => { const reduceQueue = () => {
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0; global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
@ -128,7 +128,7 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
}); });
// get vector and insert // get vector and insert
const { insertLen } = await pushDataListToTrainingQueue({ const { insertLen } = await pushDataListToTrainingQueueByCollectionId({
teamId: data.teamId, teamId: data.teamId,
tmbId: data.tmbId, tmbId: data.tmbId,
collectionId: data.collectionId, collectionId: data.collectionId,

View File

@ -1,4 +1,4 @@
import { GET, POST, PUT, DELETE } from '@/web/common/api/request'; import { GET, POST } from '@/web/common/api/request';
import type { UploadImgProps } from '@fastgpt/global/common/file/api.d'; import type { UploadImgProps } from '@fastgpt/global/common/file/api.d';
import { AxiosProgressEvent } from 'axios'; import { AxiosProgressEvent } from 'axios';
@ -8,10 +8,16 @@ export const postUploadFiles = (
data: FormData, data: FormData,
onUploadProgress: (progressEvent: AxiosProgressEvent) => void onUploadProgress: (progressEvent: AxiosProgressEvent) => void
) => ) =>
POST<string[]>('/common/file/upload', data, { POST<string>('/common/file/upload', data, {
timeout: 480000, timeout: 480000,
onUploadProgress, onUploadProgress,
headers: { headers: {
'Content-Type': 'multipart/form-data; charset=utf-8' 'Content-Type': 'multipart/form-data; charset=utf-8'
} }
}); });
export const getPreviewFileContent = (data: { fileId: string; csvFormat: boolean }) =>
POST<{
previewContent: string;
totalLength: number;
}>('/common/file/previewContent', data);

View File

@ -7,13 +7,13 @@ import { compressBase64Img, type CompressImgProps } from '@fastgpt/web/common/fi
/** /**
* upload file to mongo gridfs * upload file to mongo gridfs
*/ */
export const uploadFiles = ({ export const uploadFile2DB = ({
files, file,
bucketName, bucketName,
metadata = {}, metadata = {},
percentListen percentListen
}: { }: {
files: File[]; file: File;
bucketName: `${BucketNameEnum}`; bucketName: `${BucketNameEnum}`;
metadata?: Record<string, any>; metadata?: Record<string, any>;
percentListen?: (percent: number) => void; percentListen?: (percent: number) => void;
@ -21,9 +21,7 @@ export const uploadFiles = ({
const form = new FormData(); const form = new FormData();
form.append('metadata', JSON.stringify(metadata)); form.append('metadata', JSON.stringify(metadata));
form.append('bucketName', bucketName); form.append('bucketName', bucketName);
files.forEach((file) => { form.append('file', file, encodeURIComponent(file.name));
form.append('file', file, encodeURIComponent(file.name));
});
return postUploadFiles(form, (e) => { return postUploadFiles(form, (e) => {
if (!e.total) return; if (!e.total) return;

View File

@ -23,14 +23,18 @@ export const useSelectFile = (props?: {
accept={fileType} accept={fileType}
multiple={multiple} multiple={multiple}
onChange={(e) => { onChange={(e) => {
if (!e.target.files || e.target.files?.length === 0) return; const files = e.target.files;
if (e.target.files.length > maxCount) { if (!files || files?.length === 0) return;
return toast({
let fileList = Array.from(files);
if (fileList.length > maxCount) {
toast({
status: 'warning', status: 'warning',
title: t('common.file.Select file amount limit', { max: maxCount }) title: t('common.file.Select file amount limit', { max: maxCount })
}); });
fileList = fileList.slice(0, maxCount);
} }
onSelect(Array.from(e.target.files), openSign.current); onSelect(fileList, openSign.current);
}} }}
/> />
</Box> </Box>

View File

@ -77,15 +77,15 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
let options = {}; let options = {};
if (MediaRecorder.isTypeSupported('audio/webm')) { if (MediaRecorder.isTypeSupported('audio/webm')) {
options = { type: 'audio/webm' }; options = { type: 'audio/webm' };
} else if (MediaRecorder.isTypeSupported('video/mp4')) { } else if (MediaRecorder.isTypeSupported('video/mp3')) {
options = { type: 'video/mp4' }; options = { type: 'video/mp3' };
} else { } else {
console.error('no suitable mimetype found for this device'); console.error('no suitable mimetype found for this device');
} }
const blob = new Blob(chunks, options); const blob = new Blob(chunks, options);
const duration = Math.round((Date.now() - startTimestamp.current) / 1000); const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
formData.append('file', blob, 'recording.mp4'); formData.append('file', blob, 'recording.mp3');
formData.append( formData.append(
'data', 'data',
JSON.stringify({ JSON.stringify({

View File

@ -8,13 +8,19 @@ import type {
} from '@/global/core/api/datasetReq.d'; } from '@/global/core/api/datasetReq.d';
import type { import type {
CreateDatasetCollectionParams, CreateDatasetCollectionParams,
CsvTableCreateDatasetCollectionParams,
DatasetUpdateBody, DatasetUpdateBody,
FileIdCreateDatasetCollectionParams,
LinkCreateDatasetCollectionParams, LinkCreateDatasetCollectionParams,
PostWebsiteSyncParams PostWebsiteSyncParams,
TextCreateDatasetCollectionParams
} from '@fastgpt/global/core/dataset/api.d'; } from '@fastgpt/global/core/dataset/api.d';
import type { import type {
GetTrainingQueueProps, GetTrainingQueueProps,
GetTrainingQueueResponse, GetTrainingQueueResponse,
PostPreviewFilesChunksProps,
PostPreviewFilesChunksResponse,
PostPreviewTableChunksResponse,
SearchTestProps, SearchTestProps,
SearchTestResponse SearchTestResponse
} from '@/global/core/dataset/api.d'; } from '@/global/core/dataset/api.d';
@ -23,10 +29,7 @@ import type {
CreateDatasetParams, CreateDatasetParams,
InsertOneDatasetDataProps InsertOneDatasetDataProps
} from '@/global/core/dataset/api.d'; } from '@/global/core/dataset/api.d';
import type { import type { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api.d';
PushDatasetDataProps,
PushDatasetDataResponse
} from '@fastgpt/global/core/dataset/api.d';
import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type'; import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { import {
DatasetCollectionSyncResultEnum, DatasetCollectionSyncResultEnum,
@ -75,8 +78,14 @@ export const getDatasetCollectionById = (id: string) =>
GET<DatasetCollectionItemType>(`/core/dataset/collection/detail`, { id }); GET<DatasetCollectionItemType>(`/core/dataset/collection/detail`, { id });
export const postDatasetCollection = (data: CreateDatasetCollectionParams) => export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
POST<string>(`/core/dataset/collection/create`, data); POST<string>(`/core/dataset/collection/create`, data);
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/file`, data);
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) => export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data); POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/text`, data);
export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data);
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) => export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
POST(`/core/dataset/collection/update`, data); POST(`/core/dataset/collection/update`, data);
@ -95,12 +104,6 @@ export const getDatasetDataList = (data: GetDatasetDataListProps) =>
export const getDatasetDataItemById = (id: string) => export const getDatasetDataItemById = (id: string) =>
GET<DatasetDataItemType>(`/core/dataset/data/detail`, { id }); GET<DatasetDataItemType>(`/core/dataset/data/detail`, { id });
/**
* push data to training queue
*/
export const postChunks2Dataset = (data: PushDatasetDataProps) =>
POST<PushDatasetDataResponse>(`/core/dataset/data/pushData`, data);
/** /**
* insert one data to dataset (immediately insert) * insert one data to dataset (immediately insert)
*/ */
@ -122,6 +125,8 @@ export const delOneDatasetDataById = (id: string) =>
/* get length of system training queue */ /* get length of system training queue */
export const getTrainingQueueLen = (data: GetTrainingQueueProps) => export const getTrainingQueueLen = (data: GetTrainingQueueProps) =>
GET<GetTrainingQueueResponse>(`/core/dataset/training/getQueueLen`, data); GET<GetTrainingQueueResponse>(`/core/dataset/training/getQueueLen`, data);
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
POST<{ q: string; a: string }[]>('/core/dataset/file/getPreviewChunks', data);
/* ================== file ======================== */ /* ================== file ======================== */
export const getFileViewUrl = (fileId: string) => export const getFileViewUrl = (fileId: string) =>

View File

@ -1,200 +0,0 @@
import MyBox from '@/components/common/MyBox';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { Box, FlexProps } from '@chakra-ui/react';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import React, { DragEvent, useCallback, useState } from 'react';
export type SelectFileItemType = {
folderPath: string;
file: File;
};
const FileSelector = ({
fileType,
multiple,
maxCount,
maxSize,
isLoading,
onSelectFile,
...props
}: {
fileType: string;
multiple?: boolean;
maxCount?: number;
maxSize?: number;
isLoading?: boolean;
onSelectFile: (e: SelectFileItemType[]) => any;
} & FlexProps) => {
const { t } = useTranslation();
const { toast } = useToast();
const { File, onOpen } = useSelectFile({
fileType,
multiple,
maxCount
});
const [isDragging, setIsDragging] = useState(false);
const filterTypeReg = new RegExp(
`(${fileType
.split(',')
.map((item) => item.trim())
.join('|')})$`,
'i'
);
const selectFileCallback = useCallback(
(files: SelectFileItemType[]) => {
// size check
if (!maxSize) {
return onSelectFile(files);
}
const filterFiles = files.filter((item) => item.file.size <= maxSize);
if (filterFiles.length < files.length) {
toast({
status: 'warning',
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
});
}
return onSelectFile(filterFiles);
},
[maxSize, onSelectFile, t, toast]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
};
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
const items = e.dataTransfer.items;
const fileList: SelectFileItemType[] = [];
if (e.dataTransfer.items.length <= 1) {
const traverseFileTree = async (item: any) => {
return new Promise<void>((resolve, reject) => {
if (item.isFile) {
item.file((file: File) => {
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
if (filterTypeReg.test(file.name)) {
fileList.push({
folderPath,
file
});
}
resolve();
});
} else if (item.isDirectory) {
const dirReader = item.createReader();
dirReader.readEntries(async (entries: any[]) => {
for (let i = 0; i < entries.length; i++) {
await traverseFileTree(entries[i]);
}
resolve();
});
}
});
};
for await (const item of items) {
await traverseFileTree(item.webkitGetAsEntry());
}
} else {
const files = Array.from(e.dataTransfer.files);
let isErr = files.some((item) => item.type === '');
if (isErr) {
return toast({
title: t('file.upload error description'),
status: 'error'
});
}
fileList.push(
...files
.filter((item) => filterTypeReg.test(item.name))
.map((file) => ({
folderPath: '',
file
}))
);
}
selectFileCallback(fileList.slice(0, maxCount));
};
return (
<MyBox
isLoading={isLoading}
display={'flex'}
flexDirection={'column'}
alignItems={'center'}
justifyContent={'center'}
px={3}
py={[4, 7]}
borderWidth={'1.5px'}
borderStyle={'dashed'}
borderRadius={'md'}
cursor={'pointer'}
_hover={{
bg: 'primary.50',
borderColor: 'primary.600'
}}
{...(isDragging
? {
borderColor: 'primary.600'
}
: {
borderColor: 'borderColor.high'
})}
{...props}
onDragEnter={handleDragEnter}
onDragOver={(e) => e.preventDefault()}
onDragLeave={handleDragLeave}
onDrop={handleDrop}
onClick={onOpen}
>
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
<Box fontWeight={'bold'}>
{isDragging
? t('file.Release the mouse to upload the file')
: t('common.file.Select and drag file tip')}
</Box>
{/* file type */}
<Box color={'myGray.500'} fontSize={'xs'}>
{t('common.file.Support file type', { fileType })}
</Box>
<Box color={'myGray.500'} fontSize={'xs'}>
{/* max count */}
{maxCount && t('common.file.Support max count', { maxCount })}
{/* max size */}
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
</Box>
<File
onSelect={(files) =>
selectFileCallback(
files.map((file) => ({
folderPath: '',
file
}))
)
}
/>
</MyBox>
);
};
export default React.memo(FileSelector);

View File

@ -1,6 +1,6 @@
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api'; import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants'; import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum } from './constants'; import { ImportProcessWayEnum, ImportSourceTypeEnum } from './constants';
import { UseFormReturn } from 'react-hook-form'; import { UseFormReturn } from 'react-hook-form';
export type ImportDataComponentProps = { export type ImportDataComponentProps = {
@ -10,19 +10,27 @@ export type ImportDataComponentProps = {
export type ImportSourceItemType = { export type ImportSourceItemType = {
id: string; id: string;
rawText: string;
chunks: PushDatasetDataChunkProps[]; createStatus: 'waiting' | 'creating' | 'finish';
chunkChars: number;
sourceFolderPath?: string;
sourceName: string;
sourceSize?: string;
icon: string;
metadata?: Record<string, any>; metadata?: Record<string, any>;
errorMsg?: string; errorMsg?: string;
// source // source
sourceName: string;
sourceSize?: string;
icon: string;
// file
isUploading?: boolean;
uploadedFileRate?: number;
dbFileId?: string; // 存储在数据库里的文件Id这个 ID 还是图片和集合的 metadata 中 relateId
file?: File; file?: File;
// link
link?: string; link?: string;
// custom text
rawText?: string;
}; };
export type ImportSourceParamsType = UseFormReturn< export type ImportSourceParamsType = UseFormReturn<

View File

@ -1,95 +1,5 @@
import { getFileViewUrl, postChunks2Dataset } from '@/web/core/dataset/api'; import { getFileViewUrl } from '@/web/core/dataset/api';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { delay } from '@fastgpt/global/common/system/utils';
import { strIsLink } from '@fastgpt/global/common/string/tools'; import { strIsLink } from '@fastgpt/global/common/string/tools';
import type {
FileCreateDatasetCollectionParams,
PushDatasetDataChunkProps
} from '@fastgpt/global/core/dataset/api.d';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { POST } from '@/web/common/api/request';
/* upload a file to create collection */
export const fileCollectionCreate = ({
file,
metadata = {},
data,
percentListen
}: {
file: File;
metadata?: Record<string, any>;
data: FileCreateDatasetCollectionParams;
percentListen: (percent: number) => void;
}) => {
const form = new FormData();
form.append('data', JSON.stringify(data));
form.append('metadata', JSON.stringify(metadata));
form.append('bucketName', BucketNameEnum.dataset);
form.append('file', file, encodeURIComponent(file.name));
return POST<string>(`/core/dataset/collection/create/file?datasetId=${data.datasetId}`, form, {
timeout: 480000,
onUploadProgress: (e) => {
if (!e.total) return;
const percent = Math.round((e.loaded / e.total) * 100);
percentListen && percentListen(percent);
},
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
}
});
};
export async function chunksUpload({
billId,
collectionId,
trainingMode,
chunks,
prompt,
rate = 50,
onUploading
}: {
billId: string;
collectionId: string;
trainingMode: `${TrainingModeEnum}`;
chunks: PushDatasetDataChunkProps[];
prompt?: string;
rate?: number;
onUploading?: (rate: number) => void;
}) {
async function upload(data: PushDatasetDataChunkProps[]) {
return postChunks2Dataset({
collectionId,
trainingMode,
data,
prompt,
billId
});
}
let successInsert = 0;
let retryTimes = 10;
for (let i = 0; i < chunks.length; i += rate) {
try {
const uploadChunks = chunks.slice(i, i + rate);
const { insertLen } = await upload(uploadChunks);
if (onUploading) {
onUploading(Math.round(((i + uploadChunks.length) / chunks.length) * 100));
}
successInsert += insertLen;
} catch (error) {
if (retryTimes === 0) {
return Promise.reject(error);
}
await delay(1000);
retryTimes--;
i -= rate;
}
}
return { insertLen: successInsert };
}
export async function getFileAndOpen(fileId: string) { export async function getFileAndOpen(fileId: string) {
if (strIsLink(fileId)) { if (strIsLink(fileId)) {

View File

@ -3,9 +3,9 @@ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
# please download the model from https://huggingface.co/BAAI/bge-reranker-base and put it in the same directory as Dockerfile # please download the model from https://huggingface.co/BAAI/bge-reranker-base and put it in the same directory as Dockerfile
COPY ./bge-reranker-base ./bge-reranker-base COPY ./bge-reranker-base ./bge-reranker-base
COPY requirement.txt . COPY requirements.txt .
RUN python3 -m pip install -r requirement.txt -i https://pypi.tuna.tsinghua.edu.cn/simple RUN python3 -m pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
COPY app.py Dockerfile . COPY app.py Dockerfile .