Feat: Images dataset collection (#4941)
* New pic (#4858) * 更新数据集相关类型,添加图像文件ID和预览URL支持;优化数据集导入功能,新增图像数据集处理组件;修复部分国际化文本;更新文件上传逻辑以支持新功能。 * 与原先代码的差别 * 新增 V4.9.10 更新说明,支持 PG 设置`systemEnv.hnswMaxScanTuples`参数,优化 LLM stream 调用超时,修复全文检索多知识库排序问题。同时更新数据集索引,移除 datasetId 字段以简化查询。 * 更换成fileId_image逻辑,并增加训练队列匹配的逻辑 * 新增图片集合判断逻辑,优化预览URL生成流程,确保仅在数据集为图片集合时生成预览URL,并添加相关日志输出以便调试。 * Refactor Docker Compose configuration to comment out exposed ports for production environments, update image versions for pgvector, fastgpt, and mcp_server, and enhance Redis service with a health check. Additionally, standardize dataset collection labels in constants and improve internationalization strings across multiple languages. * Enhance TrainingStates component by adding internationalization support for the imageParse training mode and update defaultCounts to include imageParse mode in trainingDetail API. * Enhance dataset import context by adding additional steps for image dataset import process and improve internationalization strings for modal buttons in the useEditTitle hook. * Update DatasetImportContext to conditionally render MyStep component based on data source type, improving the import process for non-image datasets. * Refactor image dataset handling by improving internationalization strings, enhancing error messages, and streamlining the preview URL generation process. * 图片上传到新建的 dataset_collection_images 表,逻辑跟随更改 * 修改了除了controller的其他部分问题 * 把图片数据集的逻辑整合到controller里面 * 补充i18n * 补充i18n * resolve评论:主要是上传逻辑的更改和组件复用 * 图片名称的图标显示 * 修改编译报错的命名问题 * 删除不需要的collectionid部分 * 多余文件的处理和改动一个删除按钮 * 除了loading和统一的imageId,其他都resolve掉的 * 处理图标报错 * 复用了MyPhotoView并采用全部替换的方式将imageFileId变成imageId * 去除不必要文件修改 * 报错和字段修改 * 增加上传成功后删除临时文件的逻辑以及回退一些修改 * 删除path字段,将图片保存到gridfs内,并修改增删等操作的代码 * 修正编译错误 --------- Co-authored-by: archer <545436317@qq.com> * perf: image dataset * feat: insert image * perf: image icon * fix: training state --------- Co-authored-by: Zhuangzai fa <143257420+ctrlz526@users.noreply.github.com>
This commit is contained in:
parent
9fb5d05865
commit
92c38d9d2f
@ -10,8 +10,9 @@ weight: 789
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 工作流中增加节点搜索功能。
|
||||
2. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新。
|
||||
1. 商业版支持图片知识库。
|
||||
2. 工作流中增加节点搜索功能。
|
||||
3. 工作流中,子流程版本控制,可选择“保持最新版本”,无需手动更新。
|
||||
|
||||
## ⚙️ 优化
|
||||
|
||||
|
||||
@ -6,7 +6,8 @@ export const fileImgs = [
|
||||
{ suffix: '(doc|docs)', src: 'file/fill/doc' },
|
||||
{ suffix: 'txt', src: 'file/fill/txt' },
|
||||
{ suffix: 'md', src: 'file/fill/markdown' },
|
||||
{ suffix: 'html', src: 'file/fill/html' }
|
||||
{ suffix: 'html', src: 'file/fill/html' },
|
||||
{ suffix: '(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|tif)', src: 'image' }
|
||||
|
||||
// { suffix: '.', src: '/imgs/files/file.svg' }
|
||||
];
|
||||
|
||||
@ -2,4 +2,5 @@ export type AuthFrequencyLimitProps = {
|
||||
eventId: string;
|
||||
maxAmount: number;
|
||||
expiredTime: Date;
|
||||
num?: number;
|
||||
};
|
||||
|
||||
@ -34,7 +34,7 @@ export const valToStr = (val: any) => {
|
||||
};
|
||||
|
||||
// replace {{variable}} to value
|
||||
export function replaceVariable(text: any, obj: Record<string, string | number>) {
|
||||
export function replaceVariable(text: any, obj: Record<string, string | number | undefined>) {
|
||||
if (typeof text !== 'string') return text;
|
||||
|
||||
for (const key in obj) {
|
||||
|
||||
18
packages/global/core/dataset/api.d.ts
vendored
18
packages/global/core/dataset/api.d.ts
vendored
@ -1,4 +1,9 @@
|
||||
import type { ChunkSettingsType, DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
||||
import type {
|
||||
ChunkSettingsType,
|
||||
DatasetDataIndexItemType,
|
||||
DatasetDataFieldType,
|
||||
DatasetSchemaType
|
||||
} from './type';
|
||||
import type {
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
@ -7,8 +12,7 @@ import type {
|
||||
ChunkTriggerConfigTypeEnum,
|
||||
ParagraphChunkAIModeEnum
|
||||
} from './constants';
|
||||
import type { LLMModelItemType } from '../ai/model.d';
|
||||
import type { ParentIdType } from 'common/parentFolder/type';
|
||||
import type { ParentIdType } from '../../common/parentFolder/type';
|
||||
|
||||
/* ================= dataset ===================== */
|
||||
export type DatasetUpdateBody = {
|
||||
@ -100,6 +104,9 @@ export type ExternalFileCreateDatasetCollectionParams = ApiCreateDatasetCollecti
|
||||
externalFileUrl: string;
|
||||
filename?: string;
|
||||
};
|
||||
export type ImageCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
collectionName: string;
|
||||
};
|
||||
|
||||
/* ================= tag ===================== */
|
||||
export type CreateDatasetCollectionTagParams = {
|
||||
@ -125,8 +132,9 @@ export type PgSearchRawType = {
|
||||
score: number;
|
||||
};
|
||||
export type PushDatasetDataChunkProps = {
|
||||
q: string; // embedding content
|
||||
a?: string; // bonus content
|
||||
q?: string;
|
||||
a?: string;
|
||||
imageId?: string;
|
||||
chunkIndex?: number;
|
||||
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
};
|
||||
|
||||
@ -77,7 +77,8 @@ export enum DatasetCollectionTypeEnum {
|
||||
file = 'file',
|
||||
link = 'link', // one link
|
||||
externalFile = 'externalFile',
|
||||
apiFile = 'apiFile'
|
||||
apiFile = 'apiFile',
|
||||
images = 'images'
|
||||
}
|
||||
export const DatasetCollectionTypeMap = {
|
||||
[DatasetCollectionTypeEnum.folder]: {
|
||||
@ -97,6 +98,9 @@ export const DatasetCollectionTypeMap = {
|
||||
},
|
||||
[DatasetCollectionTypeEnum.apiFile]: {
|
||||
name: i18nT('common:core.dataset.apiFile')
|
||||
},
|
||||
[DatasetCollectionTypeEnum.images]: {
|
||||
name: i18nT('dataset:core.dataset.Image collection')
|
||||
}
|
||||
};
|
||||
|
||||
@ -120,6 +124,7 @@ export const DatasetCollectionSyncResultMap = {
|
||||
export enum DatasetCollectionDataProcessModeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
imageParse = 'imageParse',
|
||||
backup = 'backup',
|
||||
|
||||
auto = 'auto' // abandon
|
||||
@ -133,6 +138,10 @@ export const DatasetCollectionDataProcessModeMap = {
|
||||
label: i18nT('common:core.dataset.training.QA mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
|
||||
},
|
||||
[DatasetCollectionDataProcessModeEnum.imageParse]: {
|
||||
label: i18nT('dataset:training.Image mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip')
|
||||
},
|
||||
[DatasetCollectionDataProcessModeEnum.backup]: {
|
||||
label: i18nT('dataset:backup_mode'),
|
||||
tooltip: i18nT('dataset:backup_mode')
|
||||
@ -172,14 +181,16 @@ export enum ImportDataSourceEnum {
|
||||
fileCustom = 'fileCustom',
|
||||
externalFile = 'externalFile',
|
||||
apiDataset = 'apiDataset',
|
||||
reTraining = 'reTraining'
|
||||
reTraining = 'reTraining',
|
||||
imageDataset = 'imageDataset'
|
||||
}
|
||||
|
||||
export enum TrainingModeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
auto = 'auto',
|
||||
image = 'image'
|
||||
image = 'image',
|
||||
imageParse = 'imageParse'
|
||||
}
|
||||
|
||||
/* ------------ search -------------- */
|
||||
|
||||
4
packages/global/core/dataset/controller.d.ts
vendored
4
packages/global/core/dataset/controller.d.ts
vendored
@ -8,17 +8,19 @@ export type CreateDatasetDataProps = {
|
||||
chunkIndex?: number;
|
||||
q: string;
|
||||
a?: string;
|
||||
imageId?: string;
|
||||
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
};
|
||||
|
||||
export type UpdateDatasetDataProps = {
|
||||
dataId: string;
|
||||
|
||||
q?: string;
|
||||
q: string;
|
||||
a?: string;
|
||||
indexes?: (Omit<DatasetDataIndexItemType, 'dataId'> & {
|
||||
dataId?: string; // pg data id
|
||||
})[];
|
||||
imageId?: string;
|
||||
};
|
||||
|
||||
export type PatchIndexesProps =
|
||||
|
||||
13
packages/global/core/dataset/image/type.d.ts
vendored
Normal file
13
packages/global/core/dataset/image/type.d.ts
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
export type DatasetImageSchema = {
|
||||
_id: string;
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
collectionId?: string;
|
||||
name: string;
|
||||
contentType: string;
|
||||
size: number;
|
||||
metadata?: Record<string, any>;
|
||||
expiredTime?: Date;
|
||||
createdAt: Date;
|
||||
updatedAt: Date;
|
||||
};
|
||||
26
packages/global/core/dataset/type.d.ts
vendored
26
packages/global/core/dataset/type.d.ts
vendored
@ -16,6 +16,7 @@ import type { DatasetPermission } from '../../support/permission/dataset/control
|
||||
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
|
||||
import type { SourceMemberType } from 'support/user/type';
|
||||
import type { DatasetDataIndexTypeEnum } from './data/constants';
|
||||
import type { ParentIdType } from 'common/parentFolder/type';
|
||||
|
||||
export type ChunkSettingsType = {
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
@ -49,7 +50,7 @@ export type ChunkSettingsType = {
|
||||
|
||||
export type DatasetSchemaType = {
|
||||
_id: string;
|
||||
parentId?: string;
|
||||
parentId: ParentIdType;
|
||||
userId: string;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
@ -132,7 +133,13 @@ export type DatasetDataIndexItemType = {
|
||||
dataId: string; // pg data id
|
||||
text: string;
|
||||
};
|
||||
export type DatasetDataSchemaType = {
|
||||
|
||||
export type DatasetDataFieldType = {
|
||||
q: string; // large chunks or question
|
||||
a?: string; // answer or custom content
|
||||
imageId?: string;
|
||||
};
|
||||
export type DatasetDataSchemaType = DatasetDataFieldType & {
|
||||
_id: string;
|
||||
userId: string;
|
||||
teamId: string;
|
||||
@ -141,13 +148,9 @@ export type DatasetDataSchemaType = {
|
||||
collectionId: string;
|
||||
chunkIndex: number;
|
||||
updateTime: Date;
|
||||
q: string; // large chunks or question
|
||||
a: string; // answer or custom content
|
||||
history?: {
|
||||
q: string;
|
||||
a: string;
|
||||
history?: (DatasetDataFieldType & {
|
||||
updateTime: Date;
|
||||
}[];
|
||||
})[];
|
||||
forbid?: boolean;
|
||||
fullTextToken: string;
|
||||
indexes: DatasetDataIndexItemType[];
|
||||
@ -179,6 +182,7 @@ export type DatasetTrainingSchemaType = {
|
||||
dataId?: string;
|
||||
q: string;
|
||||
a: string;
|
||||
imageId?: string;
|
||||
chunkIndex: number;
|
||||
indexSize?: number;
|
||||
weight: number;
|
||||
@ -244,20 +248,18 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
|
||||
};
|
||||
|
||||
/* ================= data ===================== */
|
||||
export type DatasetDataItemType = {
|
||||
export type DatasetDataItemType = DatasetDataFieldType & {
|
||||
id: string;
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
imagePreivewUrl?: string;
|
||||
updateTime: Date;
|
||||
collectionId: string;
|
||||
sourceName: string;
|
||||
sourceId?: string;
|
||||
q: string;
|
||||
a: string;
|
||||
chunkIndex: number;
|
||||
indexes: DatasetDataIndexItemType[];
|
||||
isOwner: boolean;
|
||||
// permission: DatasetPermission;
|
||||
};
|
||||
|
||||
/* --------------- file ---------------------- */
|
||||
|
||||
@ -2,10 +2,15 @@ import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants';
|
||||
import { getFileIcon } from '../../common/file/icon';
|
||||
import { strIsLink } from '../../common/string/tools';
|
||||
|
||||
export function getCollectionIcon(
|
||||
type: DatasetCollectionTypeEnum = DatasetCollectionTypeEnum.file,
|
||||
name = ''
|
||||
) {
|
||||
export function getCollectionIcon({
|
||||
type = DatasetCollectionTypeEnum.file,
|
||||
name = '',
|
||||
sourceId
|
||||
}: {
|
||||
type?: DatasetCollectionTypeEnum;
|
||||
name?: string;
|
||||
sourceId?: string;
|
||||
}) {
|
||||
if (type === DatasetCollectionTypeEnum.folder) {
|
||||
return 'common/folderFill';
|
||||
}
|
||||
@ -15,7 +20,10 @@ export function getCollectionIcon(
|
||||
if (type === DatasetCollectionTypeEnum.virtual) {
|
||||
return 'file/fill/manual';
|
||||
}
|
||||
return getFileIcon(name);
|
||||
if (type === DatasetCollectionTypeEnum.images) {
|
||||
return 'core/dataset/imageFill';
|
||||
}
|
||||
return getSourceNameIcon({ sourceName: name, sourceId });
|
||||
}
|
||||
export function getSourceNameIcon({
|
||||
sourceName,
|
||||
|
||||
@ -142,23 +142,26 @@ export const updateRawTextBufferExpiredTime = async ({
|
||||
};
|
||||
|
||||
export const clearExpiredRawTextBufferCron = async () => {
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
const clearExpiredRawTextBuffer = async () => {
|
||||
addLog.debug('Clear expired raw text buffer start');
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
return retryFn(async () => {
|
||||
const data = await MongoRawTextBufferSchema.find(
|
||||
{
|
||||
'metadata.expiredTime': { $lt: new Date() }
|
||||
},
|
||||
'_id'
|
||||
).lean();
|
||||
const data = await MongoRawTextBufferSchema.find(
|
||||
{
|
||||
'metadata.expiredTime': { $lt: new Date() }
|
||||
},
|
||||
'_id'
|
||||
).lean();
|
||||
|
||||
for (const item of data) {
|
||||
for (const item of data) {
|
||||
try {
|
||||
await gridBucket.delete(item._id);
|
||||
} catch (error) {
|
||||
addLog.error('Delete expired raw text buffer error', error);
|
||||
}
|
||||
addLog.debug('Clear expired raw text buffer end');
|
||||
});
|
||||
}
|
||||
addLog.debug('Clear expired raw text buffer end');
|
||||
};
|
||||
|
||||
setCron('*/10 * * * *', async () => {
|
||||
|
||||
@ -7,12 +7,13 @@ import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
|
||||
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { readRawContentByFileBuffer } from '../read/utils';
|
||||
import { gridFsStream2Buffer, stream2Encoding } from './utils';
|
||||
import { computeGridFsChunSize, gridFsStream2Buffer, stream2Encoding } from './utils';
|
||||
import { addLog } from '../../system/log';
|
||||
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
||||
import { Readable } from 'stream';
|
||||
import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
|
||||
import { addMinutes } from 'date-fns';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
|
||||
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
||||
MongoDatasetFileSchema;
|
||||
@ -64,23 +65,7 @@ export async function uploadFile({
|
||||
// create a gridfs bucket
|
||||
const bucket = getGridBucket(bucketName);
|
||||
|
||||
const fileSize = stats.size;
|
||||
// 单块大小:尽可能大,但不超过 14MB,不小于512KB
|
||||
const chunkSizeBytes = (() => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||
|
||||
// 确保块大小至少为512KB
|
||||
const minChunkSize = 512 * 1024; // 512KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
})();
|
||||
const chunkSizeBytes = computeGridFsChunSize(stats.size);
|
||||
|
||||
const stream = bucket.openUploadStream(filename, {
|
||||
metadata,
|
||||
@ -173,24 +158,18 @@ export async function getFileById({
|
||||
|
||||
export async function delFileByFileIdList({
|
||||
bucketName,
|
||||
fileIdList,
|
||||
retry = 3
|
||||
fileIdList
|
||||
}: {
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
fileIdList: string[];
|
||||
retry?: number;
|
||||
}): Promise<any> {
|
||||
try {
|
||||
return retryFn(async () => {
|
||||
const bucket = getGridBucket(bucketName);
|
||||
|
||||
for await (const fileId of fileIdList) {
|
||||
await bucket.delete(new Types.ObjectId(fileId));
|
||||
}
|
||||
} catch (error) {
|
||||
if (retry > 0) {
|
||||
return delFileByFileIdList({ bucketName, fileIdList, retry: retry - 1 });
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export async function getDownloadStream({
|
||||
|
||||
@ -105,3 +105,20 @@ export const stream2Encoding = async (stream: NodeJS.ReadableStream) => {
|
||||
stream: copyStream
|
||||
};
|
||||
};
|
||||
|
||||
// 单块大小:尽可能大,但不超过 14MB,不小于512KB
|
||||
export const computeGridFsChunSize = (fileSize: number) => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||
|
||||
// 确保块大小至少为512KB
|
||||
const minChunkSize = 512 * 1024; // 512KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
};
|
||||
|
||||
@ -22,7 +22,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
|
||||
maxSize *= 1024 * 1024;
|
||||
|
||||
class UploadModel {
|
||||
uploader = multer({
|
||||
uploaderSingle = multer({
|
||||
limits: {
|
||||
fieldSize: maxSize
|
||||
},
|
||||
@ -41,8 +41,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
|
||||
}
|
||||
})
|
||||
}).single('file');
|
||||
|
||||
async doUpload<T = any>(
|
||||
async getUploadFile<T = any>(
|
||||
req: NextApiRequest,
|
||||
res: NextApiResponse,
|
||||
originBucketName?: `${BucketNameEnum}`
|
||||
@ -54,7 +53,7 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
|
||||
bucketName?: `${BucketNameEnum}`;
|
||||
}>((resolve, reject) => {
|
||||
// @ts-ignore
|
||||
this.uploader(req, res, (error) => {
|
||||
this.uploaderSingle(req, res, (error) => {
|
||||
if (error) {
|
||||
return reject(error);
|
||||
}
|
||||
@ -94,6 +93,58 @@ export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
uploaderMultiple = multer({
|
||||
limits: {
|
||||
fieldSize: maxSize
|
||||
},
|
||||
preservePath: true,
|
||||
storage: multer.diskStorage({
|
||||
// destination: (_req, _file, cb) => {
|
||||
// cb(null, tmpFileDirPath);
|
||||
// },
|
||||
filename: (req, file, cb) => {
|
||||
if (!file?.originalname) {
|
||||
cb(new Error('File not found'), '');
|
||||
} else {
|
||||
const { ext } = path.parse(decodeURIComponent(file.originalname));
|
||||
cb(null, `${getNanoid()}${ext}`);
|
||||
}
|
||||
}
|
||||
})
|
||||
}).array('file', global.feConfigs?.uploadFileMaxSize);
|
||||
async getUploadFiles<T = any>(req: NextApiRequest, res: NextApiResponse) {
|
||||
return new Promise<{
|
||||
files: FileType[];
|
||||
data: T;
|
||||
}>((resolve, reject) => {
|
||||
// @ts-ignore
|
||||
this.uploaderMultiple(req, res, (error) => {
|
||||
if (error) {
|
||||
console.log(error);
|
||||
return reject(error);
|
||||
}
|
||||
|
||||
// @ts-ignore
|
||||
const files = req.files as FileType[];
|
||||
|
||||
resolve({
|
||||
files: files.map((file) => ({
|
||||
...file,
|
||||
originalname: decodeURIComponent(file.originalname)
|
||||
})),
|
||||
data: (() => {
|
||||
if (!req.body?.data) return {};
|
||||
try {
|
||||
return JSON.parse(req.body.data);
|
||||
} catch (error) {
|
||||
return {};
|
||||
}
|
||||
})()
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return new UploadModel();
|
||||
|
||||
@ -4,7 +4,8 @@ import { MongoFrequencyLimit } from './schema';
|
||||
export const authFrequencyLimit = async ({
|
||||
eventId,
|
||||
maxAmount,
|
||||
expiredTime
|
||||
expiredTime,
|
||||
num = 1
|
||||
}: AuthFrequencyLimitProps) => {
|
||||
try {
|
||||
// 对应 eventId 的 account+1, 不存在的话,则创建一个
|
||||
@ -14,7 +15,7 @@ export const authFrequencyLimit = async ({
|
||||
expiredTime: { $gte: new Date() }
|
||||
},
|
||||
{
|
||||
$inc: { amount: 1 },
|
||||
$inc: { amount: num },
|
||||
// If not exist, set the expiredTime
|
||||
$setOnInsert: { expiredTime }
|
||||
},
|
||||
|
||||
@ -6,7 +6,9 @@ export enum TimerIdEnum {
|
||||
updateStandardPlan = 'updateStandardPlan',
|
||||
scheduleTriggerApp = 'scheduleTriggerApp',
|
||||
notification = 'notification',
|
||||
clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer'
|
||||
|
||||
clearExpiredRawTextBuffer = 'clearExpiredRawTextBuffer',
|
||||
clearExpiredDatasetImage = 'clearExpiredDatasetImage'
|
||||
}
|
||||
|
||||
export enum LockNotificationEnum {
|
||||
|
||||
@ -20,6 +20,10 @@ export const getVlmModel = (model?: string) => {
|
||||
?.find((item) => item.model === model || item.name === model);
|
||||
};
|
||||
|
||||
export const getVlmModelList = () => {
|
||||
return Array.from(global.llmModelMap.values())?.filter((item) => item.vision) || [];
|
||||
};
|
||||
|
||||
export const getDefaultEmbeddingModel = () => global?.systemDefaultModel.embedding!;
|
||||
export const getEmbeddingModel = (model?: string) => {
|
||||
if (!model) return getDefaultEmbeddingModel();
|
||||
|
||||
@ -5,9 +5,10 @@ import {
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
import {
|
||||
type DatasetCollectionSchemaType,
|
||||
type DatasetSchemaType
|
||||
import type {
|
||||
DatasetCollectionSchemaType,
|
||||
DatasetDataFieldType,
|
||||
DatasetSchemaType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { MongoDatasetData } from '../data/schema';
|
||||
@ -15,7 +16,7 @@ import { delImgByRelatedId } from '../../../common/file/image/controller';
|
||||
import { deleteDatasetDataVector } from '../../../common/vectorDB/controller';
|
||||
import { delFileByFileIdList } from '../../../common/file/gridfs/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { type ClientSession } from '../../../common/mongo';
|
||||
import type { ClientSession } from '../../../common/mongo';
|
||||
import { createOrGetCollectionTags } from './utils';
|
||||
import { rawText2Chunks } from '../read';
|
||||
import { checkDatasetLimit } from '../../../support/permission/teamLimit';
|
||||
@ -38,20 +39,25 @@ import {
|
||||
getLLMMaxChunkSize
|
||||
} from '@fastgpt/global/core/dataset/training/utils';
|
||||
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
|
||||
import { deleteDatasetImage } from '../image/controller';
|
||||
import { clearCollectionImages, removeDatasetImageExpiredTime } from '../image/utils';
|
||||
|
||||
export const createCollectionAndInsertData = async ({
|
||||
dataset,
|
||||
rawText,
|
||||
relatedId,
|
||||
imageIds,
|
||||
createCollectionParams,
|
||||
backupParse = false,
|
||||
billId,
|
||||
session
|
||||
}: {
|
||||
dataset: DatasetSchemaType;
|
||||
rawText: string;
|
||||
rawText?: string;
|
||||
relatedId?: string;
|
||||
imageIds?: string[];
|
||||
createCollectionParams: CreateOneCollectionParams;
|
||||
|
||||
backupParse?: boolean;
|
||||
|
||||
billId?: string;
|
||||
@ -69,13 +75,13 @@ export const createCollectionAndInsertData = async ({
|
||||
// Set default params
|
||||
const trainingType =
|
||||
createCollectionParams.trainingType || DatasetCollectionDataProcessModeEnum.chunk;
|
||||
const chunkSize = computeChunkSize({
|
||||
...createCollectionParams,
|
||||
trainingType,
|
||||
llmModel: getLLMModel(dataset.agentModel)
|
||||
});
|
||||
const chunkSplitter = computeChunkSplitter(createCollectionParams);
|
||||
const paragraphChunkDeep = computeParagraphChunkDeep(createCollectionParams);
|
||||
const trainingMode = getTrainingModeByCollection({
|
||||
trainingType: trainingType,
|
||||
autoIndexes: createCollectionParams.autoIndexes,
|
||||
imageIndex: createCollectionParams.imageIndex
|
||||
});
|
||||
|
||||
if (
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.qa ||
|
||||
@ -90,35 +96,60 @@ export const createCollectionAndInsertData = async ({
|
||||
delete createCollectionParams.qaPrompt;
|
||||
}
|
||||
|
||||
// 1. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkTriggerType: createCollectionParams.chunkTriggerType,
|
||||
chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
|
||||
chunkSize,
|
||||
paragraphChunkDeep,
|
||||
paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
|
||||
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
|
||||
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : [],
|
||||
backupParse
|
||||
});
|
||||
// 1. split chunks or create image chunks
|
||||
const {
|
||||
chunks,
|
||||
chunkSize
|
||||
}: {
|
||||
chunks: Array<{
|
||||
q?: string;
|
||||
a?: string; // answer or custom content
|
||||
imageId?: string;
|
||||
indexes?: string[];
|
||||
}>;
|
||||
chunkSize?: number;
|
||||
} = (() => {
|
||||
if (rawText) {
|
||||
const chunkSize = computeChunkSize({
|
||||
...createCollectionParams,
|
||||
trainingType,
|
||||
llmModel: getLLMModel(dataset.agentModel)
|
||||
});
|
||||
// Process text chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkTriggerType: createCollectionParams.chunkTriggerType,
|
||||
chunkTriggerMinSize: createCollectionParams.chunkTriggerMinSize,
|
||||
chunkSize,
|
||||
paragraphChunkDeep,
|
||||
paragraphChunkMinSize: createCollectionParams.paragraphChunkMinSize,
|
||||
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
|
||||
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : [],
|
||||
backupParse
|
||||
});
|
||||
return { chunks, chunkSize };
|
||||
}
|
||||
|
||||
if (imageIds) {
|
||||
// Process image chunks
|
||||
const chunks = imageIds.map((imageId: string) => ({
|
||||
imageId,
|
||||
indexes: []
|
||||
}));
|
||||
return { chunks };
|
||||
}
|
||||
throw new Error('Either rawText or imageIdList must be provided');
|
||||
})();
|
||||
|
||||
// 2. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(
|
||||
getTrainingModeByCollection({
|
||||
trainingType: trainingType,
|
||||
autoIndexes: createCollectionParams.autoIndexes,
|
||||
imageIndex: createCollectionParams.imageIndex
|
||||
}),
|
||||
chunks
|
||||
)
|
||||
insertLen: predictDataLimitLength(trainingMode, chunks)
|
||||
});
|
||||
|
||||
const fn = async (session: ClientSession) => {
|
||||
// 3. create collection
|
||||
// 3. Create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...createCollectionParams,
|
||||
trainingType,
|
||||
@ -126,8 +157,8 @@ export const createCollectionAndInsertData = async ({
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
|
||||
hashRawText: hashStr(rawText),
|
||||
rawTextLength: rawText.length,
|
||||
hashRawText: rawText ? hashStr(rawText) : undefined,
|
||||
rawTextLength: rawText?.length,
|
||||
nextSyncTime: (() => {
|
||||
// ignore auto collections sync for website datasets
|
||||
if (!dataset.autoSync && dataset.type === DatasetTypeEnum.websiteDataset) return undefined;
|
||||
@ -169,11 +200,7 @@ export const createCollectionAndInsertData = async ({
|
||||
vectorModel: dataset.vectorModel,
|
||||
vlmModel: dataset.vlmModel,
|
||||
indexSize: createCollectionParams.indexSize,
|
||||
mode: getTrainingModeByCollection({
|
||||
trainingType: trainingType,
|
||||
autoIndexes: createCollectionParams.autoIndexes,
|
||||
imageIndex: createCollectionParams.imageIndex
|
||||
}),
|
||||
mode: trainingMode,
|
||||
prompt: createCollectionParams.qaPrompt,
|
||||
billId: traingBillId,
|
||||
data: chunks.map((item, index) => ({
|
||||
@ -187,7 +214,12 @@ export const createCollectionAndInsertData = async ({
|
||||
session
|
||||
});
|
||||
|
||||
// 6. remove related image ttl
|
||||
// 6. Remove images ttl index
|
||||
await removeDatasetImageExpiredTime({
|
||||
ids: imageIds,
|
||||
collectionId,
|
||||
session
|
||||
});
|
||||
if (relatedId) {
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
@ -207,7 +239,7 @@ export const createCollectionAndInsertData = async ({
|
||||
}
|
||||
|
||||
return {
|
||||
collectionId,
|
||||
collectionId: String(collectionId),
|
||||
insertResults
|
||||
};
|
||||
};
|
||||
@ -288,17 +320,20 @@ export const delCollectionRelatedSource = async ({
|
||||
.map((item) => item?.metadata?.relatedImgId || '')
|
||||
.filter(Boolean);
|
||||
|
||||
// Delete files
|
||||
await delFileByFileIdList({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileIdList
|
||||
});
|
||||
// Delete images
|
||||
await delImgByRelatedId({
|
||||
teamId,
|
||||
relateIds: relatedImageIds,
|
||||
session
|
||||
});
|
||||
// Delete files and images in parallel
|
||||
await Promise.all([
|
||||
// Delete files
|
||||
delFileByFileIdList({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileIdList
|
||||
}),
|
||||
// Delete images
|
||||
delImgByRelatedId({
|
||||
teamId,
|
||||
relateIds: relatedImageIds,
|
||||
session
|
||||
})
|
||||
]);
|
||||
};
|
||||
/**
|
||||
* delete collection and it related data
|
||||
@ -343,16 +378,16 @@ export async function delCollection({
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIds }
|
||||
}),
|
||||
// Delete dataset_images
|
||||
clearCollectionImages(collectionIds),
|
||||
// Delete images if needed
|
||||
...(delImg
|
||||
? [
|
||||
delImgByRelatedId({
|
||||
teamId,
|
||||
relateIds: collections
|
||||
.map((item) => item?.metadata?.relatedImgId || '')
|
||||
.filter(Boolean)
|
||||
})
|
||||
]
|
||||
? collections
|
||||
.map((item) => item?.metadata?.relatedImgId || '')
|
||||
.filter(Boolean)
|
||||
.map((imageId) => deleteDatasetImage(imageId))
|
||||
: []),
|
||||
// Delete files if needed
|
||||
...(delFile
|
||||
? [
|
||||
delFileByFileIdList({
|
||||
|
||||
@ -1,11 +1,9 @@
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
import { type ClientSession } from '../../../common/mongo';
|
||||
import type { ClientSession } from '../../../common/mongo';
|
||||
import { MongoDatasetCollectionTags } from '../tag/schema';
|
||||
import { readFromSecondary } from '../../../common/mongo/utils';
|
||||
import {
|
||||
type CollectionWithDatasetType,
|
||||
type DatasetCollectionSchemaType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionSyncResultEnum,
|
||||
@ -233,18 +231,37 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
|
||||
QA: 独立进程
|
||||
Chunk: Image Index -> Auto index -> chunk index
|
||||
*/
|
||||
export const getTrainingModeByCollection = (collection: {
|
||||
trainingType: DatasetCollectionSchemaType['trainingType'];
|
||||
autoIndexes?: DatasetCollectionSchemaType['autoIndexes'];
|
||||
imageIndex?: DatasetCollectionSchemaType['imageIndex'];
|
||||
export const getTrainingModeByCollection = ({
|
||||
trainingType,
|
||||
autoIndexes,
|
||||
imageIndex
|
||||
}: {
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
autoIndexes?: boolean;
|
||||
imageIndex?: boolean;
|
||||
}) => {
|
||||
if (collection.trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
if (
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.imageParse &&
|
||||
global.feConfigs?.isPlus
|
||||
) {
|
||||
return TrainingModeEnum.imageParse;
|
||||
}
|
||||
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
return TrainingModeEnum.qa;
|
||||
}
|
||||
if (collection.imageIndex && global.feConfigs?.isPlus) {
|
||||
if (
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.chunk &&
|
||||
imageIndex &&
|
||||
global.feConfigs?.isPlus
|
||||
) {
|
||||
return TrainingModeEnum.image;
|
||||
}
|
||||
if (collection.autoIndexes && global.feConfigs?.isPlus) {
|
||||
if (
|
||||
trainingType === DatasetCollectionDataProcessModeEnum.chunk &&
|
||||
autoIndexes &&
|
||||
global.feConfigs?.isPlus
|
||||
) {
|
||||
return TrainingModeEnum.auto;
|
||||
}
|
||||
return TrainingModeEnum.chunk;
|
||||
|
||||
@ -9,6 +9,7 @@ import { deleteDatasetDataVector } from '../../common/vectorDB/controller';
|
||||
import { MongoDatasetDataText } from './data/dataTextSchema';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
import { clearDatasetImages } from './image/utils';
|
||||
|
||||
/* ============= dataset ========== */
|
||||
/* find all datasetId by top datasetId */
|
||||
@ -102,8 +103,10 @@ export async function delDatasetRelevantData({
|
||||
}),
|
||||
//delete dataset_datas
|
||||
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
|
||||
// Delete Image and file
|
||||
// Delete collection image and file
|
||||
delCollectionRelatedSource({ collections }),
|
||||
// Delete dataset Image
|
||||
clearDatasetImages(datasetIds),
|
||||
// Delete vector data
|
||||
deleteDatasetDataVector({ teamId, datasetIds })
|
||||
]);
|
||||
|
||||
57
packages/service/core/dataset/data/controller.ts
Normal file
57
packages/service/core/dataset/data/controller.ts
Normal file
@ -0,0 +1,57 @@
|
||||
import { getDatasetImagePreviewUrl } from '../image/utils';
|
||||
import type { QuoteDataItemType } from '../../../../../projects/app/src/service/core/chat/constants';
|
||||
import type { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
export const formatDatasetDataValue = ({
|
||||
q,
|
||||
a,
|
||||
imageId,
|
||||
teamId,
|
||||
datasetId
|
||||
}: {
|
||||
q: string;
|
||||
a?: string;
|
||||
imageId?: string;
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
}): {
|
||||
q: string;
|
||||
a?: string;
|
||||
imagePreivewUrl?: string;
|
||||
} => {
|
||||
if (!imageId) {
|
||||
return {
|
||||
q,
|
||||
a
|
||||
};
|
||||
}
|
||||
|
||||
const previewUrl = getDatasetImagePreviewUrl({
|
||||
imageId,
|
||||
teamId,
|
||||
datasetId,
|
||||
expiredMinutes: 60 * 24 * 7 // 7 days
|
||||
});
|
||||
|
||||
return {
|
||||
q: ``,
|
||||
a,
|
||||
imagePreivewUrl: previewUrl
|
||||
};
|
||||
};
|
||||
|
||||
export const getFormatDatasetCiteList = (list: DatasetDataSchemaType[]) => {
|
||||
return list.map<QuoteDataItemType>((item) => ({
|
||||
_id: item._id,
|
||||
...formatDatasetDataValue({
|
||||
teamId: item.teamId,
|
||||
datasetId: item.datasetId,
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
imageId: item.imageId
|
||||
}),
|
||||
history: item.history,
|
||||
updateTime: item.updateTime,
|
||||
index: item.chunkIndex
|
||||
}));
|
||||
};
|
||||
@ -37,8 +37,7 @@ const DatasetDataSchema = new Schema({
|
||||
required: true
|
||||
},
|
||||
a: {
|
||||
type: String,
|
||||
default: ''
|
||||
type: String
|
||||
},
|
||||
history: {
|
||||
type: [
|
||||
@ -74,6 +73,9 @@ const DatasetDataSchema = new Schema({
|
||||
default: []
|
||||
},
|
||||
|
||||
imageId: {
|
||||
type: String
|
||||
},
|
||||
updateTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
|
||||
166
packages/service/core/dataset/image/controller.ts
Normal file
166
packages/service/core/dataset/image/controller.ts
Normal file
@ -0,0 +1,166 @@
|
||||
import { addMinutes } from 'date-fns';
|
||||
import { bucketName, MongoDatasetImageSchema } from './schema';
|
||||
import { connectionMongo, Types } from '../../../common/mongo';
|
||||
import fs from 'fs';
|
||||
import type { FileType } from '../../../common/file/multer';
|
||||
import fsp from 'fs/promises';
|
||||
import { computeGridFsChunSize } from '../../../common/file/gridfs/utils';
|
||||
import { setCron } from '../../../common/system/cron';
|
||||
import { checkTimerLock } from '../../../common/system/timerLock/utils';
|
||||
import { TimerIdEnum } from '../../../common/system/timerLock/constants';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
|
||||
const getGridBucket = () => {
|
||||
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
|
||||
bucketName: bucketName
|
||||
});
|
||||
};
|
||||
|
||||
export const createDatasetImage = async ({
|
||||
teamId,
|
||||
datasetId,
|
||||
file,
|
||||
expiredTime = addMinutes(new Date(), 30)
|
||||
}: {
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
file: FileType;
|
||||
expiredTime?: Date;
|
||||
}): Promise<{ imageId: string; previewUrl: string }> => {
|
||||
const path = file.path;
|
||||
const gridBucket = getGridBucket();
|
||||
const metadata = {
|
||||
teamId: String(teamId),
|
||||
datasetId: String(datasetId),
|
||||
expiredTime
|
||||
};
|
||||
|
||||
const stats = await fsp.stat(path);
|
||||
if (!stats.isFile()) return Promise.reject(`${path} is not a file`);
|
||||
|
||||
const readStream = fs.createReadStream(path, {
|
||||
highWaterMark: 256 * 1024
|
||||
});
|
||||
const chunkSizeBytes = computeGridFsChunSize(stats.size);
|
||||
|
||||
const stream = gridBucket.openUploadStream(file.originalname, {
|
||||
metadata,
|
||||
contentType: file.mimetype,
|
||||
chunkSizeBytes
|
||||
});
|
||||
|
||||
// save to gridfs
|
||||
await new Promise((resolve, reject) => {
|
||||
readStream
|
||||
.pipe(stream as any)
|
||||
.on('finish', resolve)
|
||||
.on('error', reject);
|
||||
});
|
||||
|
||||
return {
|
||||
imageId: String(stream.id),
|
||||
previewUrl: ''
|
||||
};
|
||||
};
|
||||
|
||||
export const getDatasetImageReadData = async (imageId: string) => {
|
||||
// Get file metadata to get contentType
|
||||
const fileInfo = await MongoDatasetImageSchema.findOne({
|
||||
_id: new Types.ObjectId(imageId)
|
||||
}).lean();
|
||||
if (!fileInfo) {
|
||||
return Promise.reject('Image not found');
|
||||
}
|
||||
|
||||
const gridBucket = getGridBucket();
|
||||
return {
|
||||
stream: gridBucket.openDownloadStream(new Types.ObjectId(imageId)),
|
||||
fileInfo
|
||||
};
|
||||
};
|
||||
export const getDatasetImageBase64 = async (imageId: string) => {
|
||||
// Get file metadata to get contentType
|
||||
const fileInfo = await MongoDatasetImageSchema.findOne({
|
||||
_id: new Types.ObjectId(imageId)
|
||||
}).lean();
|
||||
if (!fileInfo) {
|
||||
return Promise.reject('Image not found');
|
||||
}
|
||||
|
||||
// Get image stream from GridFS
|
||||
const { stream } = await getDatasetImageReadData(imageId);
|
||||
|
||||
// Convert stream to buffer
|
||||
const chunks: Buffer[] = [];
|
||||
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
stream.on('data', (chunk: Buffer) => {
|
||||
chunks.push(chunk);
|
||||
});
|
||||
|
||||
stream.on('end', () => {
|
||||
// Combine all chunks into a single buffer
|
||||
const buffer = Buffer.concat(chunks);
|
||||
// Convert buffer to base64 string
|
||||
const base64 = buffer.toString('base64');
|
||||
const dataUrl = `data:${fileInfo.contentType || 'image/jpeg'};base64,${base64}`;
|
||||
resolve(dataUrl);
|
||||
});
|
||||
|
||||
stream.on('error', reject);
|
||||
});
|
||||
};
|
||||
|
||||
export const deleteDatasetImage = async (imageId: string) => {
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
try {
|
||||
await gridBucket.delete(new Types.ObjectId(imageId));
|
||||
} catch (error: any) {
|
||||
const msg = error?.message;
|
||||
if (msg.includes('File not found')) {
|
||||
addLog.warn('Delete dataset image error', error);
|
||||
return;
|
||||
} else {
|
||||
return Promise.reject(error);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
export const clearExpiredDatasetImageCron = async () => {
|
||||
const gridBucket = getGridBucket();
|
||||
const clearExpiredDatasetImages = async () => {
|
||||
addLog.debug('Clear expired dataset image start');
|
||||
|
||||
const data = await MongoDatasetImageSchema.find(
|
||||
{
|
||||
'metadata.expiredTime': { $lt: new Date() }
|
||||
},
|
||||
'_id'
|
||||
).lean();
|
||||
|
||||
for (const item of data) {
|
||||
try {
|
||||
await gridBucket.delete(item._id);
|
||||
} catch (error) {
|
||||
addLog.error('Delete expired dataset image error', error);
|
||||
}
|
||||
}
|
||||
addLog.debug('Clear expired dataset image end');
|
||||
};
|
||||
|
||||
setCron('*/10 * * * *', async () => {
|
||||
if (
|
||||
await checkTimerLock({
|
||||
timerId: TimerIdEnum.clearExpiredDatasetImage,
|
||||
lockMinuted: 9
|
||||
})
|
||||
) {
|
||||
try {
|
||||
await clearExpiredDatasetImages();
|
||||
} catch (error) {
|
||||
addLog.error('clearExpiredDatasetImageCron error', error);
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
36
packages/service/core/dataset/image/schema.ts
Normal file
36
packages/service/core/dataset/image/schema.ts
Normal file
@ -0,0 +1,36 @@
|
||||
import type { Types } from '../../../common/mongo';
|
||||
import { getMongoModel, Schema } from '../../../common/mongo';
|
||||
|
||||
export const bucketName = 'dataset_image';
|
||||
|
||||
const MongoDatasetImage = new Schema({
|
||||
length: { type: Number, required: true },
|
||||
chunkSize: { type: Number, required: true },
|
||||
uploadDate: { type: Date, required: true },
|
||||
filename: { type: String, required: true },
|
||||
contentType: { type: String, required: true },
|
||||
metadata: {
|
||||
teamId: { type: String, required: true },
|
||||
datasetId: { type: String, required: true },
|
||||
collectionId: { type: String },
|
||||
expiredTime: { type: Date, required: true }
|
||||
}
|
||||
});
|
||||
MongoDatasetImage.index({ 'metadata.datasetId': 'hashed' });
|
||||
MongoDatasetImage.index({ 'metadata.collectionId': 'hashed' });
|
||||
MongoDatasetImage.index({ 'metadata.expiredTime': -1 });
|
||||
|
||||
export const MongoDatasetImageSchema = getMongoModel<{
|
||||
_id: Types.ObjectId;
|
||||
length: number;
|
||||
chunkSize: number;
|
||||
uploadDate: Date;
|
||||
filename: string;
|
||||
contentType: string;
|
||||
metadata: {
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
expiredTime: Date;
|
||||
};
|
||||
}>(`${bucketName}.files`, MongoDatasetImage);
|
||||
101
packages/service/core/dataset/image/utils.ts
Normal file
101
packages/service/core/dataset/image/utils.ts
Normal file
@ -0,0 +1,101 @@
|
||||
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
|
||||
import { Types, type ClientSession } from '../../../common/mongo';
|
||||
import { deleteDatasetImage } from './controller';
|
||||
import { MongoDatasetImageSchema } from './schema';
|
||||
import { addMinutes } from 'date-fns';
|
||||
import jwt from 'jsonwebtoken';
|
||||
|
||||
export const removeDatasetImageExpiredTime = async ({
|
||||
ids = [],
|
||||
collectionId,
|
||||
session
|
||||
}: {
|
||||
ids?: string[];
|
||||
collectionId: string;
|
||||
session?: ClientSession;
|
||||
}) => {
|
||||
if (ids.length === 0) return;
|
||||
return MongoDatasetImageSchema.updateMany(
|
||||
{
|
||||
_id: {
|
||||
$in: ids
|
||||
.filter((id) => Types.ObjectId.isValid(id))
|
||||
.map((id) => (typeof id === 'string' ? new Types.ObjectId(id) : id))
|
||||
}
|
||||
},
|
||||
{
|
||||
$unset: { 'metadata.expiredTime': '' },
|
||||
$set: {
|
||||
'metadata.collectionId': String(collectionId)
|
||||
}
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
};
|
||||
|
||||
export const getDatasetImagePreviewUrl = ({
|
||||
imageId,
|
||||
teamId,
|
||||
datasetId,
|
||||
expiredMinutes
|
||||
}: {
|
||||
imageId: string;
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
expiredMinutes: number;
|
||||
}) => {
|
||||
const expiredTime = Math.floor(addMinutes(new Date(), expiredMinutes).getTime() / 1000);
|
||||
|
||||
const key = (process.env.FILE_TOKEN_KEY as string) ?? 'filetoken';
|
||||
const token = jwt.sign(
|
||||
{
|
||||
teamId: String(teamId),
|
||||
datasetId: String(datasetId),
|
||||
exp: expiredTime
|
||||
},
|
||||
key
|
||||
);
|
||||
|
||||
return `/api/core/dataset/image/${imageId}?token=${token}`;
|
||||
};
|
||||
export const authDatasetImagePreviewUrl = (token?: string) =>
|
||||
new Promise<{
|
||||
teamId: string;
|
||||
datasetId: string;
|
||||
}>((resolve, reject) => {
|
||||
if (!token) {
|
||||
return reject(ERROR_ENUM.unAuthFile);
|
||||
}
|
||||
const key = (process.env.FILE_TOKEN_KEY as string) ?? 'filetoken';
|
||||
|
||||
jwt.verify(token, key, (err, decoded: any) => {
|
||||
if (err || !decoded?.teamId || !decoded?.datasetId) {
|
||||
reject(ERROR_ENUM.unAuthFile);
|
||||
return;
|
||||
}
|
||||
resolve({
|
||||
teamId: decoded.teamId,
|
||||
datasetId: decoded.datasetId
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
export const clearDatasetImages = async (datasetIds: string[]) => {
|
||||
const images = await MongoDatasetImageSchema.find(
|
||||
{
|
||||
'metadata.datasetId': { $in: datasetIds.map((item) => String(item)) }
|
||||
},
|
||||
'_id'
|
||||
).lean();
|
||||
await Promise.all(images.map((image) => deleteDatasetImage(String(image._id))));
|
||||
};
|
||||
|
||||
export const clearCollectionImages = async (collectionIds: string[]) => {
|
||||
const images = await MongoDatasetImageSchema.find(
|
||||
{
|
||||
'metadata.collectionId': { $in: collectionIds.map((item) => String(item)) }
|
||||
},
|
||||
'_id'
|
||||
).lean();
|
||||
await Promise.all(images.map((image) => deleteDatasetImage(String(image._id))));
|
||||
};
|
||||
@ -186,9 +186,11 @@ export const rawText2Chunks = ({
|
||||
chunkTriggerMinSize = 1000,
|
||||
backupParse,
|
||||
chunkSize = 512,
|
||||
imageIdList,
|
||||
...splitProps
|
||||
}: {
|
||||
rawText: string;
|
||||
imageIdList?: string[];
|
||||
|
||||
chunkTriggerType?: ChunkTriggerConfigTypeEnum;
|
||||
chunkTriggerMinSize?: number; // maxSize from agent model, not store
|
||||
@ -199,6 +201,7 @@ export const rawText2Chunks = ({
|
||||
q: string;
|
||||
a: string;
|
||||
indexes?: string[];
|
||||
imageIdList?: string[];
|
||||
}[] => {
|
||||
const parseDatasetBackup2Chunks = (rawText: string) => {
|
||||
const csvArr = Papa.parse(rawText).data as string[][];
|
||||
@ -209,7 +212,8 @@ export const rawText2Chunks = ({
|
||||
.map((item) => ({
|
||||
q: item[0] || '',
|
||||
a: item[1] || '',
|
||||
indexes: item.slice(2)
|
||||
indexes: item.slice(2),
|
||||
imageIdList
|
||||
}))
|
||||
.filter((item) => item.q || item.a);
|
||||
|
||||
@ -231,7 +235,8 @@ export const rawText2Chunks = ({
|
||||
return [
|
||||
{
|
||||
q: rawText,
|
||||
a: ''
|
||||
a: '',
|
||||
imageIdList
|
||||
}
|
||||
];
|
||||
}
|
||||
@ -240,7 +245,7 @@ export const rawText2Chunks = ({
|
||||
if (chunkTriggerType !== ChunkTriggerConfigTypeEnum.forceChunk) {
|
||||
const textLength = rawText.trim().length;
|
||||
if (textLength < chunkTriggerMinSize) {
|
||||
return [{ q: rawText, a: '' }];
|
||||
return [{ q: rawText, a: '', imageIdList }];
|
||||
}
|
||||
}
|
||||
|
||||
@ -253,6 +258,7 @@ export const rawText2Chunks = ({
|
||||
return chunks.map((item) => ({
|
||||
q: item,
|
||||
a: '',
|
||||
indexes: []
|
||||
indexes: [],
|
||||
imageIdList
|
||||
}));
|
||||
};
|
||||
|
||||
@ -28,6 +28,7 @@ import type { NodeInputKeyEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import { datasetSearchQueryExtension } from './utils';
|
||||
import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { formatDatasetDataValue } from '../data/controller';
|
||||
|
||||
export type SearchDatasetDataProps = {
|
||||
histories: ChatItemType[];
|
||||
@ -175,6 +176,12 @@ export async function searchDatasetData(
|
||||
collectionFilterMatch
|
||||
} = props;
|
||||
|
||||
// Constants data
|
||||
const datasetDataSelectField =
|
||||
'_id datasetId collectionId updateTime q a imageId chunkIndex indexes';
|
||||
const datsaetCollectionSelectField =
|
||||
'_id name fileId rawLink apiFileId externalFileId externalFileUrl';
|
||||
|
||||
/* init params */
|
||||
searchMode = DatasetSearchModeMap[searchMode] ? searchMode : DatasetSearchModeEnum.embedding;
|
||||
usingReRank = usingReRank && !!getDefaultRerankModel();
|
||||
@ -463,14 +470,14 @@ export async function searchDatasetData(
|
||||
collectionId: { $in: collectionIdList },
|
||||
'indexes.dataId': { $in: results.map((item) => item.id?.trim()) }
|
||||
},
|
||||
'_id datasetId collectionId updateTime q a chunkIndex indexes',
|
||||
datasetDataSelectField,
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: collectionIdList }
|
||||
},
|
||||
'_id name fileId rawLink apiFileId externalFileId externalFileUrl',
|
||||
datsaetCollectionSelectField,
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
@ -494,8 +501,13 @@ export async function searchDatasetData(
|
||||
const result: SearchDataResponseItemType = {
|
||||
id: String(data._id),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
...formatDatasetDataValue({
|
||||
teamId,
|
||||
datasetId: data.datasetId,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
imageId: data.imageId
|
||||
}),
|
||||
chunkIndex: data.chunkIndex,
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
@ -597,14 +609,14 @@ export async function searchDatasetData(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.dataId) }
|
||||
},
|
||||
'_id datasetId collectionId updateTime q a chunkIndex indexes',
|
||||
datasetDataSelectField,
|
||||
{ ...readFromSecondary }
|
||||
).lean(),
|
||||
MongoDatasetCollection.find(
|
||||
{
|
||||
_id: { $in: searchResults.map((item) => item.collectionId) }
|
||||
},
|
||||
'_id name fileId rawLink apiFileId externalFileId externalFileUrl',
|
||||
datsaetCollectionSelectField,
|
||||
{ ...readFromSecondary }
|
||||
).lean()
|
||||
]);
|
||||
@ -630,8 +642,13 @@ export async function searchDatasetData(
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId),
|
||||
updateTime: data.updateTime,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
...formatDatasetDataValue({
|
||||
teamId,
|
||||
datasetId: data.datasetId,
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
imageId: data.imageId
|
||||
}),
|
||||
chunkIndex: data.chunkIndex,
|
||||
indexes: data.indexes,
|
||||
...getCollectionSourceData(collection),
|
||||
|
||||
@ -12,10 +12,7 @@ import { getCollectionWithDataset } from '../controller';
|
||||
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
|
||||
import { type PushDataToTrainingQueueProps } from '@fastgpt/global/core/dataset/training/type';
|
||||
import { i18nT } from '../../../../web/i18n/utils';
|
||||
import {
|
||||
getLLMDefaultChunkSize,
|
||||
getLLMMaxChunkSize
|
||||
} from '../../../../global/core/dataset/training/utils';
|
||||
import { getLLMMaxChunkSize } from '../../../../global/core/dataset/training/utils';
|
||||
|
||||
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
|
||||
try {
|
||||
@ -65,7 +62,7 @@ export async function pushDataListToTrainingQueue({
|
||||
const getImageChunkMode = (data: PushDatasetDataChunkProps, mode: TrainingModeEnum) => {
|
||||
if (mode !== TrainingModeEnum.image) return mode;
|
||||
// 检查内容中,是否包含  的图片格式
|
||||
const text = data.q + data.a || '';
|
||||
const text = (data.q || '') + (data.a || '');
|
||||
const regex = /!\[\]\((.*?)\)/g;
|
||||
const match = text.match(regex);
|
||||
if (match) {
|
||||
@ -82,9 +79,6 @@ export async function pushDataListToTrainingQueue({
|
||||
if (!agentModelData) {
|
||||
return Promise.reject(i18nT('common:error_llm_not_config'));
|
||||
}
|
||||
if (mode === TrainingModeEnum.chunk || mode === TrainingModeEnum.auto) {
|
||||
prompt = undefined;
|
||||
}
|
||||
|
||||
const { model, maxToken, weight } = await (async () => {
|
||||
if (mode === TrainingModeEnum.chunk) {
|
||||
@ -101,7 +95,7 @@ export async function pushDataListToTrainingQueue({
|
||||
weight: 0
|
||||
};
|
||||
}
|
||||
if (mode === TrainingModeEnum.image) {
|
||||
if (mode === TrainingModeEnum.image || mode === TrainingModeEnum.imageParse) {
|
||||
const vllmModelData = getVlmModel(vlmModel);
|
||||
if (!vllmModelData) {
|
||||
return Promise.reject(i18nT('common:error_vlm_not_config'));
|
||||
@ -117,11 +111,9 @@ export async function pushDataListToTrainingQueue({
|
||||
})();
|
||||
|
||||
// filter repeat or equal content
|
||||
const set = new Set();
|
||||
const filterResult: Record<string, PushDatasetDataChunkProps[]> = {
|
||||
success: [],
|
||||
overToken: [],
|
||||
repeat: [],
|
||||
error: []
|
||||
};
|
||||
|
||||
@ -140,7 +132,7 @@ export async function pushDataListToTrainingQueue({
|
||||
.filter(Boolean);
|
||||
|
||||
// filter repeat content
|
||||
if (!item.q) {
|
||||
if (!item.imageId && !item.q) {
|
||||
filterResult.error.push(item);
|
||||
return;
|
||||
}
|
||||
@ -153,32 +145,26 @@ export async function pushDataListToTrainingQueue({
|
||||
return;
|
||||
}
|
||||
|
||||
if (set.has(text)) {
|
||||
filterResult.repeat.push(item);
|
||||
} else {
|
||||
filterResult.success.push(item);
|
||||
set.add(text);
|
||||
}
|
||||
filterResult.success.push(item);
|
||||
});
|
||||
|
||||
// insert data to db
|
||||
const insertLen = filterResult.success.length;
|
||||
const failedDocuments: PushDatasetDataChunkProps[] = [];
|
||||
|
||||
// 使用 insertMany 批量插入
|
||||
const batchSize = 200;
|
||||
const batchSize = 500;
|
||||
const insertData = async (startIndex: number, session: ClientSession) => {
|
||||
const list = filterResult.success.slice(startIndex, startIndex + batchSize);
|
||||
|
||||
if (list.length === 0) return;
|
||||
|
||||
try {
|
||||
await MongoDatasetTraining.insertMany(
|
||||
const result = await MongoDatasetTraining.insertMany(
|
||||
list.map((item) => ({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId,
|
||||
collectionId,
|
||||
datasetId: datasetId,
|
||||
collectionId: collectionId,
|
||||
billId,
|
||||
mode: getImageChunkMode(item, mode),
|
||||
prompt,
|
||||
@ -189,25 +175,25 @@ export async function pushDataListToTrainingQueue({
|
||||
indexSize,
|
||||
weight: weight ?? 0,
|
||||
indexes: item.indexes,
|
||||
retryCount: 5
|
||||
retryCount: 5,
|
||||
...(item.imageId ? { imageId: item.imageId } : {})
|
||||
})),
|
||||
{
|
||||
session,
|
||||
ordered: true
|
||||
ordered: false,
|
||||
rawResult: true,
|
||||
includeResultMetadata: false // 进一步减少返回数据
|
||||
}
|
||||
);
|
||||
|
||||
if (result.insertedCount !== list.length) {
|
||||
return Promise.reject(`Insert data error, ${JSON.stringify(result)}`);
|
||||
}
|
||||
} catch (error: any) {
|
||||
addLog.error(`Insert error`, error);
|
||||
// 如果有错误,将失败的文档添加到失败列表中
|
||||
error.writeErrors?.forEach((writeError: any) => {
|
||||
failedDocuments.push(data[writeError.index]);
|
||||
});
|
||||
console.log('failed', failedDocuments);
|
||||
return Promise.reject(error);
|
||||
}
|
||||
|
||||
// 对于失败的文档,尝试单独插入
|
||||
await MongoDatasetTraining.create(failedDocuments, { session });
|
||||
|
||||
return insertData(startIndex + batchSize, session);
|
||||
};
|
||||
|
||||
@ -222,7 +208,6 @@ export async function pushDataListToTrainingQueue({
|
||||
delete filterResult.success;
|
||||
|
||||
return {
|
||||
insertLen,
|
||||
...filterResult
|
||||
insertLen
|
||||
};
|
||||
}
|
||||
|
||||
@ -99,6 +99,9 @@ const TrainingDataSchema = new Schema({
|
||||
],
|
||||
default: []
|
||||
},
|
||||
imageId: {
|
||||
type: String
|
||||
},
|
||||
|
||||
errorMsg: String
|
||||
});
|
||||
|
||||
@ -358,7 +358,7 @@ async function filterDatasetQuote({
|
||||
return replaceVariable(quoteTemplate, {
|
||||
id: item.id,
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
a: item.a || '',
|
||||
updateTime: formatTime2YMDHM(item.updateTime),
|
||||
source: item.sourceName,
|
||||
sourceId: String(item.sourceId || ''),
|
||||
|
||||
@ -16,6 +16,7 @@ import { type AuthModeType, type AuthResponseType } from '../type';
|
||||
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { type ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||
import { DatasetDefaultPermissionVal } from '@fastgpt/global/support/permission/dataset/constant';
|
||||
import { getDatasetImagePreviewUrl } from '../../../core/dataset/image/utils';
|
||||
|
||||
export const authDatasetByTmbId = async ({
|
||||
tmbId,
|
||||
@ -267,6 +268,15 @@ export async function authDatasetData({
|
||||
updateTime: datasetData.updateTime,
|
||||
q: datasetData.q,
|
||||
a: datasetData.a,
|
||||
imageId: datasetData.imageId,
|
||||
imagePreivewUrl: datasetData.imageId
|
||||
? getDatasetImagePreviewUrl({
|
||||
imageId: datasetData.imageId,
|
||||
teamId: datasetData.teamId,
|
||||
datasetId: datasetData.datasetId,
|
||||
expiredMinutes: 30
|
||||
})
|
||||
: undefined,
|
||||
chunkIndex: datasetData.chunkIndex,
|
||||
indexes: datasetData.indexes,
|
||||
datasetId: String(datasetData.datasetId),
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import { getWorkerController, WorkerNameEnum } from './utils';
|
||||
|
||||
export const preLoadWorker = async () => {
|
||||
const max = Number(global.systemEnv?.tokenWorkers || 30);
|
||||
const max = Math.min(Number(global.systemEnv?.tokenWorkers || 30), 100);
|
||||
const workerController = getWorkerController({
|
||||
name: WorkerNameEnum.countGptMessagesTokens,
|
||||
maxReservedThreads: max
|
||||
|
||||
@ -220,9 +220,11 @@ export const iconPaths = {
|
||||
import('./icons/core/dataset/feishuDatasetOutline.svg'),
|
||||
'core/dataset/fileCollection': () => import('./icons/core/dataset/fileCollection.svg'),
|
||||
'core/dataset/fullTextRecall': () => import('./icons/core/dataset/fullTextRecall.svg'),
|
||||
'core/dataset/imageFill': () => import('./icons/core/dataset/imageFill.svg'),
|
||||
'core/dataset/manualCollection': () => import('./icons/core/dataset/manualCollection.svg'),
|
||||
'core/dataset/mixedRecall': () => import('./icons/core/dataset/mixedRecall.svg'),
|
||||
'core/dataset/modeEmbedding': () => import('./icons/core/dataset/modeEmbedding.svg'),
|
||||
'core/dataset/otherDataset': () => import('./icons/core/dataset/otherDataset.svg'),
|
||||
'core/dataset/questionExtension': () => import('./icons/core/dataset/questionExtension.svg'),
|
||||
'core/dataset/rerank': () => import('./icons/core/dataset/rerank.svg'),
|
||||
'core/dataset/searchfilter': () => import('./icons/core/dataset/searchfilter.svg'),
|
||||
@ -230,7 +232,6 @@ export const iconPaths = {
|
||||
'core/dataset/tableCollection': () => import('./icons/core/dataset/tableCollection.svg'),
|
||||
'core/dataset/tag': () => import('./icons/core/dataset/tag.svg'),
|
||||
'core/dataset/websiteDataset': () => import('./icons/core/dataset/websiteDataset.svg'),
|
||||
'core/dataset/otherDataset': () => import('./icons/core/dataset/otherDataset.svg'),
|
||||
'core/dataset/websiteDatasetColor': () => import('./icons/core/dataset/websiteDatasetColor.svg'),
|
||||
'core/dataset/websiteDatasetOutline': () =>
|
||||
import('./icons/core/dataset/websiteDatasetOutline.svg'),
|
||||
@ -379,10 +380,12 @@ export const iconPaths = {
|
||||
fullScreen: () => import('./icons/fullScreen.svg'),
|
||||
help: () => import('./icons/help.svg'),
|
||||
history: () => import('./icons/history.svg'),
|
||||
image: () => import('./icons/image.svg'),
|
||||
infoRounded: () => import('./icons/infoRounded.svg'),
|
||||
kbTest: () => import('./icons/kbTest.svg'),
|
||||
key: () => import('./icons/key.svg'),
|
||||
keyPrimary: () => import('./icons/keyPrimary.svg'),
|
||||
loading: () => import('./icons/loading.svg'),
|
||||
menu: () => import('./icons/menu.svg'),
|
||||
minus: () => import('./icons/minus.svg'),
|
||||
'modal/AddClb': () => import('./icons/modal/AddClb.svg'),
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 21 20" >
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M2.24348 4.15292C1.9165 4.79466 1.9165 5.63474 1.9165 7.31489V12.6852C1.9165 14.3654 1.9165 15.2054 2.24348 15.8472C2.5311 16.4117 2.99005 16.8706 3.55453 17.1582C4.19627 17.4852 5.03635 17.4852 6.7165 17.4852H13.7832C15.4633 17.4852 16.3034 17.4852 16.9451 17.1582C17.5096 16.8706 17.9686 16.4117 18.2562 15.8472C18.5832 15.2054 18.5832 14.3654 18.5832 12.6852V7.31489C18.5832 5.63473 18.5832 4.79466 18.2562 4.15292C17.9686 3.58843 17.5096 3.12949 16.9451 2.84187C16.3034 2.51489 15.4633 2.51489 13.7832 2.51489H6.7165C5.03635 2.51489 4.19627 2.51489 3.55453 2.84187C2.99005 3.12949 2.5311 3.58843 2.24348 4.15292ZM7.88951 6.75656C7.88951 7.67703 7.14331 8.42322 6.22284 8.42322C5.30236 8.42322 4.55617 7.67703 4.55617 6.75656C4.55617 5.83608 5.30236 5.08989 6.22284 5.08989C7.14331 5.08989 7.88951 5.83608 7.88951 6.75656ZM12.8631 8.65525C12.5376 8.32981 12.01 8.32981 11.6845 8.65525L5.92965 14.4101C5.40468 14.9351 5.77648 15.8327 6.5189 15.8327L15.5062 15.8327C16.4267 15.8327 17.1729 15.0865 17.1729 14.1661V13.3103C17.1729 13.0892 17.0851 12.8773 16.9288 12.721L12.8631 8.65525Z" fill="#3370FF"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.2 KiB |
4
packages/web/components/common/Icon/icons/image.svg
Normal file
4
packages/web/components/common/Icon/icons/image.svg
Normal file
@ -0,0 +1,4 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 17 16" >
|
||||
<path d="M5.50794 6.8195C6.06022 6.8195 6.50794 6.37178 6.50794 5.8195C6.50794 5.26721 6.06022 4.8195 5.50794 4.8195C4.95565 4.8195 4.50794 5.26721 4.50794 5.8195C4.50794 6.37178 4.95565 6.8195 5.50794 6.8195Z" />
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M1.55029 5.85187C1.55029 4.50775 1.55029 3.83568 1.81188 3.32229C2.04197 2.87071 2.40913 2.50355 2.86072 2.27346C3.3741 2.01187 4.04617 2.01187 5.39029 2.01187H11.0436C12.3878 2.01187 13.0598 2.01187 13.5732 2.27346C14.0248 2.50355 14.3919 2.87071 14.622 3.32229C14.8836 3.83568 14.8836 4.50775 14.8836 5.85187V10.1481C14.8836 11.4922 14.8836 12.1643 14.622 12.6777C14.3919 13.1293 14.0248 13.4964 13.5732 13.7265C13.0598 13.9881 12.3878 13.9881 11.0436 13.9881H5.39029C4.04617 13.9881 3.3741 13.9881 2.86072 13.7265C2.40913 13.4964 2.04197 13.1293 1.81188 12.6777C1.55029 12.1643 1.55029 11.4922 1.55029 10.1481V5.85187ZM5.39029 3.3452H11.0436C11.7377 3.3452 12.1781 3.34624 12.5114 3.37347C12.8291 3.39944 12.9305 3.44241 12.9679 3.46146C13.1686 3.56373 13.3318 3.72691 13.434 3.92761C13.4531 3.96502 13.4961 4.06638 13.522 4.38413C13.5493 4.71745 13.5503 5.15781 13.5503 5.85187V10.1481C13.5503 10.1562 13.5503 10.1641 13.5503 10.1721L10.3165 6.93829C10.0561 6.67794 9.634 6.67794 9.37365 6.93829L3.70938 12.6026C3.5547 12.5791 3.49333 12.5524 3.46604 12.5385C3.26533 12.4363 3.10215 12.2731 2.99989 12.0724C2.98083 12.035 2.93786 11.9336 2.9119 11.6159C2.88466 11.2825 2.88363 10.8422 2.88363 10.1481V5.85187C2.88363 5.15781 2.88466 4.71745 2.9119 4.38413C2.93786 4.06638 2.98083 3.96502 2.99989 3.92761C3.10215 3.72691 3.26533 3.56373 3.46604 3.46146C3.50344 3.44241 3.6048 3.39944 3.92255 3.37347C4.25587 3.34624 4.69623 3.3452 5.39029 3.3452ZM9.84506 8.3525L5.54277 12.6548H11.0436C11.7377 12.6548 12.1781 12.6538 12.5114 12.6265C12.8291 12.6006 12.9305 12.5576 12.9679 12.5385C13.1686 12.4363 13.3318 12.2731 13.434 12.0724C13.4422 12.0563 13.4549 12.0283 13.4687 11.9762L9.84506 8.3525Z" />
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 2.0 KiB |
4
packages/web/components/common/Icon/icons/loading.svg
Normal file
4
packages/web/components/common/Icon/icons/loading.svg
Normal file
@ -0,0 +1,4 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" viewBox="0 0 48 48" >
|
||||
<path d="M47.3337 24C47.3337 36.8866 36.887 47.3333 24.0003 47.3333C11.1137 47.3333 0.666992 36.8866 0.666992 24C0.666992 11.1133 11.1137 0.666626 24.0003 0.666626C36.887 0.666626 47.3337 11.1133 47.3337 24ZM5.33366 24C5.33366 34.3093 13.691 42.6666 24.0003 42.6666C34.3096 42.6666 42.667 34.3093 42.667 24C42.667 13.6906 34.3096 5.33329 24.0003 5.33329C13.691 5.33329 5.33366 13.6906 5.33366 24Z" />
|
||||
<path d="M24.0003 2.99996C24.0003 1.71129 25.0476 0.654541 26.3298 0.783194C29.1026 1.06141 31.8097 1.83481 34.3204 3.07293C37.5303 4.6559 40.3331 6.95608 42.5119 9.79553C44.6907 12.635 46.1871 15.9376 46.8853 19.4479C47.4314 22.1934 47.4778 25.0084 47.0289 27.7588C46.8213 29.0306 45.5295 29.7687 44.2848 29.4352C43.04 29.1016 42.3169 27.8222 42.4926 26.5456C42.7752 24.4926 42.7147 22.4014 42.3083 20.3583C41.7497 17.5501 40.5526 14.908 38.8096 12.6364C37.0666 10.3649 34.8243 8.52471 32.2564 7.25833C30.3881 6.33698 28.3838 5.73731 26.3276 5.47894C25.049 5.31827 24.0003 4.28862 24.0003 2.99996Z" />
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.1 KiB |
@ -71,13 +71,13 @@
|
||||
"response_embedding_model_tokens": "Vector Model Tokens",
|
||||
"response_hybrid_weight": "Embedding : Full text = {{emb}} : {{text}}",
|
||||
"response_rerank_tokens": "Rearrange Model Tokens",
|
||||
"search_results": "Search results",
|
||||
"select": "Select",
|
||||
"select_file": "Upload File",
|
||||
"select_file_img": "Upload file / image",
|
||||
"select_img": "Upload Image",
|
||||
"source_cronJob": "Scheduled execution",
|
||||
"stream_output": "Stream Output",
|
||||
"to_dataset": "Go to the Knowledge Base",
|
||||
"unsupported_file_type": "Unsupported file types",
|
||||
"upload": "Upload",
|
||||
"variable_invisable_in_share": "Custom variables are not visible in login-free links",
|
||||
|
||||
@ -180,7 +180,7 @@
|
||||
"code_error.user_error.balance_not_enough": "Insufficient Account Balance",
|
||||
"code_error.user_error.bin_visitor_guest": "You Are Currently a Guest, Unauthorized to Operate",
|
||||
"code_error.user_error.un_auth_user": "User Not Found",
|
||||
"comfirm_import": "comfirm_import",
|
||||
"comfirm_import": "Confirm import",
|
||||
"comfirm_leave_page": "Confirm to Leave This Page?",
|
||||
"comfirn_create": "Confirm Creation",
|
||||
"commercial_function_tip": "Please Upgrade to the Commercial Version to Use This Feature: https://doc.fastgpt.cn/docs/commercial/intro/",
|
||||
@ -403,7 +403,6 @@
|
||||
"core.chat.response.module model": "Model",
|
||||
"core.chat.response.module name": "Model Name",
|
||||
"core.chat.response.module query": "Question/Search Term",
|
||||
"core.chat.response.module quoteList": "Quote Content",
|
||||
"core.chat.response.module similarity": "Similarity",
|
||||
"core.chat.response.module temperature": "Temperature",
|
||||
"core.chat.response.module time": "Run Time",
|
||||
@ -434,7 +433,6 @@
|
||||
"core.dataset.Text collection": "Text Dataset",
|
||||
"core.dataset.apiFile": "API File",
|
||||
"core.dataset.collection.Click top config website": "Click to Configure Website",
|
||||
"core.dataset.collection.Collection name": "Dataset Name",
|
||||
"core.dataset.collection.Collection raw text": "Dataset Content",
|
||||
"core.dataset.collection.Empty Tip": "The Dataset is Empty",
|
||||
"core.dataset.collection.QA Prompt": "QA Split Prompt",
|
||||
@ -451,7 +449,6 @@
|
||||
"core.dataset.collection.metadata.metadata": "Metadata",
|
||||
"core.dataset.collection.metadata.read source": "View Original Content",
|
||||
"core.dataset.collection.metadata.source": "Data Source",
|
||||
"core.dataset.collection.metadata.source name": "Source Name",
|
||||
"core.dataset.collection.metadata.source size": "Source Size",
|
||||
"core.dataset.collection.status.active": "Ready",
|
||||
"core.dataset.collection.status.error": "Error",
|
||||
@ -743,7 +740,7 @@
|
||||
"core.workflow.value": "Value",
|
||||
"core.workflow.variable": "Variable",
|
||||
"create": "Create",
|
||||
"create_failed": "Creation Failed",
|
||||
"create_failed": "Create failed",
|
||||
"create_success": "Created Successfully",
|
||||
"create_time": "Creation Time",
|
||||
"cron_job_run_app": "Scheduled Task",
|
||||
@ -788,7 +785,6 @@
|
||||
"dataset.dataset_name": "Dataset Name",
|
||||
"dataset.deleteFolderTips": "Confirm to Delete This Folder and All Its Contained Datasets? Data Cannot Be Recovered After Deletion, Please Confirm!",
|
||||
"dataset.test.noResult": "No Search Results",
|
||||
"dataset_data_import_q_placeholder": "Up to {{maxToken}} words.",
|
||||
"dataset_data_input_a": "Answer",
|
||||
"dataset_data_input_chunk": "Chunk",
|
||||
"dataset_data_input_chunk_content": "Chunk",
|
||||
@ -802,7 +798,6 @@
|
||||
"delete_success": "Deleted Successfully",
|
||||
"delete_warning": "Deletion Warning",
|
||||
"embedding_model_not_config": "No index model is detected",
|
||||
"error.Create failed": "Create failed",
|
||||
"error.code_error": "Verification code error",
|
||||
"error.fileNotFound": "File not found~",
|
||||
"error.inheritPermissionError": "Inherit permission Error",
|
||||
@ -1208,6 +1203,7 @@
|
||||
"templateTags.Writing": "Writing",
|
||||
"template_market": "Template Market",
|
||||
"textarea_variable_picker_tip": "Enter \"/\" to select a variable",
|
||||
"to_dataset": "To dataset",
|
||||
"ui.textarea.Magnifying": "Magnifying",
|
||||
"un_used": "Unused",
|
||||
"unauth_token": "The certificate has expired, please log in again",
|
||||
|
||||
@ -28,16 +28,21 @@
|
||||
"collection.training_type": "Chunk type",
|
||||
"collection_data_count": "Data amount",
|
||||
"collection_metadata_custom_pdf_parse": "PDF enhancement analysis",
|
||||
"collection_name": "Collection name",
|
||||
"collection_not_support_retraining": "This collection type does not support retuning parameters",
|
||||
"collection_not_support_sync": "This collection does not support synchronization",
|
||||
"collection_sync": "Sync data",
|
||||
"collection_sync_confirm_tip": "Confirm to start synchronizing data? \nThe system will pull the latest data for comparison. If the contents are different, a new collection will be created and the old collection will be deleted. Please confirm!",
|
||||
"collection_tags": "Collection Tags",
|
||||
"common.dataset.data.Input Error Tip": "[Image Dataset] Process error:",
|
||||
"common.error.unKnow": "Unknown error",
|
||||
"common_dataset": "General Dataset",
|
||||
"common_dataset_desc": "Building a knowledge base by importing files, web page links, or manual entry",
|
||||
"condition": "condition",
|
||||
"config_sync_schedule": "Configure scheduled synchronization",
|
||||
"confirm_import_images": "Total {{num}} | Confirm create",
|
||||
"confirm_to_rebuild_embedding_tip": "Are you sure you want to switch the index for the Dataset?\nSwitching the index is a significant operation that requires re-indexing all data in your Dataset, which may take a long time. Please ensure your account has sufficient remaining points.\n\nAdditionally, you need to update the applications that use this Dataset to avoid conflicts with other indexed model Datasets.",
|
||||
"core.dataset.Image collection": "Image dataset",
|
||||
"core.dataset.import.Adjust parameters": "Adjust parameters",
|
||||
"custom_data_process_params": "Custom",
|
||||
"custom_data_process_params_desc": "Customize data processing rules",
|
||||
@ -90,6 +95,7 @@
|
||||
"image_auto_parse": "Automatic image indexing",
|
||||
"image_auto_parse_tips": "Call VLM to automatically label the pictures in the document and generate additional search indexes",
|
||||
"image_training_queue": "Queue of image processing",
|
||||
"images_creating": "Creating",
|
||||
"immediate_sync": "Immediate Synchronization",
|
||||
"import.Auto mode Estimated Price Tips": "The text understanding model needs to be called, which requires more points: {{price}} points/1K tokens",
|
||||
"import.Embedding Estimated Price Tips": "Only use the index model and consume a small amount of AI points: {{price}} points/1K tokens",
|
||||
@ -104,6 +110,8 @@
|
||||
"index_size": "Index size",
|
||||
"index_size_tips": "When vectorized, the system will automatically further segment the blocks according to this size.",
|
||||
"input_required_field_to_select_baseurl": "Please enter the required information first",
|
||||
"insert_images": "Added pictures",
|
||||
"insert_images_success": "The new picture is successfully added, and you need to wait for the training to be completed before it will be displayed.",
|
||||
"is_open_schedule": "Enable scheduled synchronization",
|
||||
"keep_image": "Keep the picture",
|
||||
"loading": "Loading...",
|
||||
@ -135,6 +143,7 @@
|
||||
"process.Image_Index": "Image index generation",
|
||||
"process.Is_Ready": "Ready",
|
||||
"process.Is_Ready_Count": "{{count}} Group is ready",
|
||||
"process.Parse_Image": "Image analysis",
|
||||
"process.Parsing": "Parsing",
|
||||
"process.Vectorizing": "Index vectorization",
|
||||
"process.Waiting": "Queue",
|
||||
@ -179,13 +188,19 @@
|
||||
"training.Error": "{{count}} Group exception",
|
||||
"training.Normal": "Normal",
|
||||
"training_mode": "Chunk mode",
|
||||
"training_queue_tip": "Training queue status",
|
||||
"training_ready": "{{count}} Group",
|
||||
"uploading_progress": "Uploading: {{num}}%",
|
||||
"vector_model_max_tokens_tip": "Each chunk of data has a maximum length of 3000 tokens",
|
||||
"vector_training_queue": "Vector training queue",
|
||||
"vllm_model": "Image understanding model",
|
||||
"vlm_model_required_tooltip": "A Vision Language Model is required to create image collections",
|
||||
"vlm_model_required_warning": "Image datasets require a Vision Language Model (VLM) to be configured. Please add a model that supports image understanding in the model configuration first.",
|
||||
"waiting_for_training": "Waiting for training",
|
||||
"website_dataset": "Website Sync",
|
||||
"website_dataset_desc": "Build knowledge base by crawling web page data in batches",
|
||||
"website_info": "Website Information",
|
||||
"yuque_dataset": "Yuque Dataset",
|
||||
"yuque_dataset_config": "Yuque Dataset Config",
|
||||
"yuque_dataset_desc": "Can build a dataset using Yuque documents by configuring permissions, without secondary storage"
|
||||
"yuque_dataset": "Yuque Knowledge Base",
|
||||
"yuque_dataset_config": "Configure Yuque Knowledge Base",
|
||||
"yuque_dataset_desc": "Build knowledge base using Yuque documents by configuring document permissions, documents will not be stored twice"
|
||||
}
|
||||
|
||||
@ -1,9 +1,32 @@
|
||||
{
|
||||
"Action": "Please select the image to upload",
|
||||
"All images import failed": "All pictures failed to import",
|
||||
"Dataset_ID_not_found": "The dataset ID does not exist",
|
||||
"Failed_to_get_token": "Failed to obtain the token",
|
||||
"Image_ID_copied": "Copy ID",
|
||||
"Image_Preview": "Picture preview",
|
||||
"Image_dataset_requires_VLM_model_to_be_configured": "The image dataset needs to be configured with the image understanding model (VLM) to be used. Please add a model that supports image understanding in the model configuration first.",
|
||||
"Image_does_not_belong_to_current_team": "The picture does not belong to the current team",
|
||||
"Image_file_does_not_exist": "The picture does not exist",
|
||||
"Loading_image": "Loading the picture...",
|
||||
"Loading_image failed": "Preview loading failed",
|
||||
"Only_support_uploading_one_image": "Only support uploading one image",
|
||||
"Please select the image to upload": "Please select the image to upload",
|
||||
"Please select the image to upload select the image to upload": "",
|
||||
"Please wait for all files to upload": "Please wait for all files to be uploaded to complete",
|
||||
"bucket_chat": "Conversation Files",
|
||||
"bucket_file": "Dataset Documents",
|
||||
"click_to_view_raw_source": "Click to View Original Source",
|
||||
"common.dataset_data_input_image_support_format": "Support .jpg, .jpeg, .png, .gif, .webp formats",
|
||||
"delete_image": "Delete pictures",
|
||||
"file_name": "Filename",
|
||||
"file_size": "Filesize",
|
||||
"image": "picture",
|
||||
"image_collection": "Picture collection",
|
||||
"image_description": "Image description",
|
||||
"image_description_tip": "Please enter the description of the picture",
|
||||
"please_upload_image_first": "Please upload the picture first",
|
||||
"reached_max_file_count": "Maximum file count reached",
|
||||
"release_the_mouse_to_upload_the_file": "Release Mouse to Upload File",
|
||||
"select_and_drag_file_tip": "Click or Drag Files Here to Upload",
|
||||
"select_file_amount_limit": "You can select up to {{max}} files",
|
||||
@ -12,7 +35,9 @@
|
||||
"support_file_type": "Supports {{fileType}} file types",
|
||||
"support_max_count": "Supports up to {{maxCount}} files",
|
||||
"support_max_size": "Maximum file size is {{maxSize}}",
|
||||
"total_files": "Total {{selectFiles.length}} files",
|
||||
"upload_error_description": "Only multiple files or a single folder can be uploaded at a time",
|
||||
"upload_failed": "Upload Failed",
|
||||
"reached_max_file_count": "Maximum file count reached",
|
||||
"upload_error_description": "Only multiple files or a single folder can be uploaded at a time"
|
||||
}
|
||||
"upload_file_error": "Please upload pictures",
|
||||
"uploading": "Uploading..."
|
||||
}
|
||||
|
||||
@ -71,13 +71,13 @@
|
||||
"response_embedding_model_tokens": "向量模型 Tokens",
|
||||
"response_hybrid_weight": "语义检索 : 全文检索 = {{emb}} : {{text}}",
|
||||
"response_rerank_tokens": "重排模型 Tokens",
|
||||
"search_results": "搜索结果",
|
||||
"select": "选择",
|
||||
"select_file": "上传文件",
|
||||
"select_file_img": "上传文件/图片",
|
||||
"select_img": "上传图片",
|
||||
"source_cronJob": "定时执行",
|
||||
"stream_output": "流输出",
|
||||
"to_dataset": "前往知识库",
|
||||
"unsupported_file_type": "不支持的文件类型",
|
||||
"upload": "上传",
|
||||
"variable_invisable_in_share": "自定义变量在免登录链接中不可见",
|
||||
|
||||
@ -403,7 +403,6 @@
|
||||
"core.chat.response.module model": "模型",
|
||||
"core.chat.response.module name": "模型名",
|
||||
"core.chat.response.module query": "问题/检索词",
|
||||
"core.chat.response.module quoteList": "引用内容",
|
||||
"core.chat.response.module similarity": "相似度",
|
||||
"core.chat.response.module temperature": "温度",
|
||||
"core.chat.response.module time": "运行时长",
|
||||
@ -434,7 +433,6 @@
|
||||
"core.dataset.Text collection": "文本数据集",
|
||||
"core.dataset.apiFile": "API 文件",
|
||||
"core.dataset.collection.Click top config website": "点击配置网站",
|
||||
"core.dataset.collection.Collection name": "数据集名称",
|
||||
"core.dataset.collection.Collection raw text": "数据集内容",
|
||||
"core.dataset.collection.Empty Tip": "数据集空空如也",
|
||||
"core.dataset.collection.QA Prompt": "QA 拆分引导词",
|
||||
@ -451,7 +449,6 @@
|
||||
"core.dataset.collection.metadata.metadata": "元数据",
|
||||
"core.dataset.collection.metadata.read source": "查看原始内容",
|
||||
"core.dataset.collection.metadata.source": "数据来源",
|
||||
"core.dataset.collection.metadata.source name": "来源名",
|
||||
"core.dataset.collection.metadata.source size": "来源大小",
|
||||
"core.dataset.collection.status.active": "已就绪",
|
||||
"core.dataset.collection.status.error": "训练异常",
|
||||
@ -743,7 +740,7 @@
|
||||
"core.workflow.value": "值",
|
||||
"core.workflow.variable": "变量",
|
||||
"create": "去创建",
|
||||
"create_failed": "创建异常",
|
||||
"create_failed": "创建失败",
|
||||
"create_success": "创建成功",
|
||||
"create_time": "创建时间",
|
||||
"cron_job_run_app": "定时任务",
|
||||
@ -788,7 +785,6 @@
|
||||
"dataset.dataset_name": "知识库名称",
|
||||
"dataset.deleteFolderTips": "确认删除该文件夹及其包含的所有知识库?删除后数据无法恢复,请确认!",
|
||||
"dataset.test.noResult": "搜索结果为空",
|
||||
"dataset_data_import_q_placeholder": "最多 {{maxToken}} 字。",
|
||||
"dataset_data_input_a": "答案",
|
||||
"dataset_data_input_chunk": "常规模式",
|
||||
"dataset_data_input_chunk_content": "内容",
|
||||
@ -802,7 +798,6 @@
|
||||
"delete_success": "删除成功",
|
||||
"delete_warning": "删除警告",
|
||||
"embedding_model_not_config": "检测到没有可用的索引模型",
|
||||
"error.Create failed": "创建失败",
|
||||
"error.code_error": "验证码错误",
|
||||
"error.fileNotFound": "文件找不到了~",
|
||||
"error.inheritPermissionError": "权限继承错误",
|
||||
@ -1208,6 +1203,7 @@
|
||||
"templateTags.Writing": "文本创作",
|
||||
"template_market": "模板市场",
|
||||
"textarea_variable_picker_tip": "输入\"/\"可选择变量",
|
||||
"to_dataset": "前往知识库",
|
||||
"ui.textarea.Magnifying": "放大",
|
||||
"un_used": "未使用",
|
||||
"unauth_token": "凭证已过期,请重新登录",
|
||||
|
||||
@ -28,16 +28,21 @@
|
||||
"collection.training_type": "处理模式",
|
||||
"collection_data_count": "数据量",
|
||||
"collection_metadata_custom_pdf_parse": "PDF增强解析",
|
||||
"collection_name": "数据集名称",
|
||||
"collection_not_support_retraining": "该集合类型不支持重新调整参数",
|
||||
"collection_not_support_sync": "该集合不支持同步",
|
||||
"collection_sync": "立即同步",
|
||||
"collection_sync_confirm_tip": "确认开始同步数据?系统将会拉取最新数据进行比较,如果内容不相同,则会创建一个新的集合并删除旧的集合,请确认!",
|
||||
"collection_tags": "集合标签",
|
||||
"common.dataset.data.Input Error Tip": "[图片数据集] 处理过程错误:",
|
||||
"common.error.unKnow": "未知错误",
|
||||
"common_dataset": "通用知识库",
|
||||
"common_dataset_desc": "通过导入文件、网页链接或手动录入形式构建知识库",
|
||||
"condition": "条件",
|
||||
"config_sync_schedule": "配置定时同步",
|
||||
"confirm_import_images": "共 {{num}} 张图片 | 确认创建",
|
||||
"confirm_to_rebuild_embedding_tip": "确认为知识库切换索引?\n切换索引是一个非常重量的操作,需要对您知识库内所有数据进行重新索引,时间可能较长,请确保账号内剩余积分充足。\n\n此外,你还需要注意修改选择该知识库的应用,避免它们与其他索引模型知识库混用。",
|
||||
"core.dataset.Image collection": "图片数据集",
|
||||
"core.dataset.import.Adjust parameters": "调整参数",
|
||||
"custom_data_process_params": "自定义",
|
||||
"custom_data_process_params_desc": "自定义设置数据处理规则",
|
||||
@ -90,6 +95,7 @@
|
||||
"image_auto_parse": "图片自动索引",
|
||||
"image_auto_parse_tips": "调用 VLM 自动标注文档里的图片,并生成额外的检索索引",
|
||||
"image_training_queue": "图片处理排队",
|
||||
"images_creating": "正在创建",
|
||||
"immediate_sync": "立即同步",
|
||||
"import.Auto mode Estimated Price Tips": "需调用文本理解模型,需要消耗较多AI 积分:{{price}} 积分/1K tokens",
|
||||
"import.Embedding Estimated Price Tips": "仅使用索引模型,消耗少量 AI 积分:{{price}} 积分/1K tokens",
|
||||
@ -104,6 +110,8 @@
|
||||
"index_size": "索引大小",
|
||||
"index_size_tips": "向量化时内容的长度,系统会自动按该大小对分块进行进一步的分割。",
|
||||
"input_required_field_to_select_baseurl": "请先输入必填信息",
|
||||
"insert_images": "新增图片",
|
||||
"insert_images_success": "新增图片成功,需等待训练完成才会展示",
|
||||
"is_open_schedule": "启用定时同步",
|
||||
"keep_image": "保留图片",
|
||||
"loading": "加载中...",
|
||||
@ -135,6 +143,7 @@
|
||||
"process.Image_Index": "图片索引生成",
|
||||
"process.Is_Ready": "已就绪",
|
||||
"process.Is_Ready_Count": "{{count}} 组已就绪",
|
||||
"process.Parse_Image": "图片解析中",
|
||||
"process.Parsing": "内容解析中",
|
||||
"process.Vectorizing": "索引向量化",
|
||||
"process.Waiting": "排队中",
|
||||
@ -176,11 +185,14 @@
|
||||
"the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "知识库有训练中或正在重建的索引",
|
||||
"total_num_files": "共 {{total}} 个文件",
|
||||
"training.Error": "{{count}} 组异常",
|
||||
"training.Image mode": "图片处理",
|
||||
"training.Normal": "正常",
|
||||
"training_mode": "处理方式",
|
||||
"training_ready": "{{count}} 组",
|
||||
"uploading_progress": "上传中: {{num}}%",
|
||||
"vector_model_max_tokens_tip": "每个分块数据,最大长度为 3000 tokens",
|
||||
"vllm_model": "图片理解模型",
|
||||
"vlm_model_required_warning": "需要图片理解模型",
|
||||
"website_dataset": "Web 站点同步",
|
||||
"website_dataset_desc": "通过爬虫,批量爬取网页数据构建知识库",
|
||||
"website_info": "网站信息",
|
||||
|
||||
@ -1,9 +1,33 @@
|
||||
{
|
||||
"Action": "请选择要上传的图片",
|
||||
"All images import failed": "所有图片导入失败",
|
||||
"Dataset_ID_not_found": "数据集ID不存在",
|
||||
"Failed_to_get_token": "获取令牌失败",
|
||||
"Image_ID_copied": "已复制ID",
|
||||
"Image_Preview": "图片预览",
|
||||
"Image_dataset_requires_VLM_model_to_be_configured": "图片数据集需要配置图片理解模型(VLM)才能使用,请先在模型配置中添加支持图片理解的模型",
|
||||
"Image_does_not_belong_to_current_team": "图片不属于当前团队",
|
||||
"Image_file_does_not_exist": "图片不存在",
|
||||
"Loading_image": "加载图片中...",
|
||||
"Loading_image failed": "预览加载失败",
|
||||
"Only_support_uploading_one_image": "仅支持上传一张图片",
|
||||
"image_description_tip": "请输入图片的描述内容",
|
||||
"Please select the image to upload": "请选择要上传的图片",
|
||||
"Please wait for all files to upload": "请等待所有文件上传完成",
|
||||
"bucket_chat": "对话文件",
|
||||
"bucket_file": "知识库文件",
|
||||
"click_to_view_raw_source": "点击查看来源",
|
||||
"common.Some images failed to process": "部分图片处理失败",
|
||||
"common.dataset_data_input_image_support_format": "支持 .jpg, .jpeg, .png, .gif, .webp 格式",
|
||||
"count.core.dataset.collection.Create Success": "成功导入 {{count}} 张图片",
|
||||
"delete_image": "删除图片",
|
||||
"file_name": "文件名",
|
||||
"file_size": "文件大小",
|
||||
"image": "图片",
|
||||
"image_collection": "图片集合",
|
||||
"image_description": "图片描述",
|
||||
"please_upload_image_first": "请先上传图片",
|
||||
"reached_max_file_count": "已达到最大文件数量",
|
||||
"release_the_mouse_to_upload_the_file": "松开鼠标上传文件",
|
||||
"select_and_drag_file_tip": "点击或拖动文件到此处上传",
|
||||
"select_file_amount_limit": "最多选择 {{max}} 个文件",
|
||||
@ -12,7 +36,9 @@
|
||||
"support_file_type": "支持 {{fileType}} 类型文件",
|
||||
"support_max_count": "最多支持 {{maxCount}} 个文件",
|
||||
"support_max_size": "单个文件最大 {{maxSize}}",
|
||||
"total_files": "共{{selectFiles.length}}个文件",
|
||||
"upload_error_description": "单次只支持上传多个文件或者一个文件夹",
|
||||
"upload_failed": "上传异常",
|
||||
"reached_max_file_count": "已达到最大文件数量",
|
||||
"upload_error_description": "单次只支持上传多个文件或者一个文件夹"
|
||||
}
|
||||
"upload_file_error": "请上传图片",
|
||||
"uploading": "正在上传..."
|
||||
}
|
||||
|
||||
@ -71,13 +71,13 @@
|
||||
"response_embedding_model_tokens": "向量模型 Tokens",
|
||||
"response_hybrid_weight": "語義檢索 : 全文檢索 = {{emb}} : {{text}}",
|
||||
"response_rerank_tokens": "重排模型 Tokens",
|
||||
"search_results": "搜索結果",
|
||||
"select": "選取",
|
||||
"select_file": "上傳檔案",
|
||||
"select_file_img": "上傳檔案 / 圖片",
|
||||
"select_img": "上傳圖片",
|
||||
"source_cronJob": "定時執行",
|
||||
"stream_output": "串流輸出",
|
||||
"to_dataset": "前往知識庫",
|
||||
"unsupported_file_type": "不支援的檔案類型",
|
||||
"upload": "上傳",
|
||||
"variable_invisable_in_share": "自定義變數在免登入連結中不可見",
|
||||
|
||||
@ -403,7 +403,6 @@
|
||||
"core.chat.response.module model": "模型",
|
||||
"core.chat.response.module name": "模型名稱",
|
||||
"core.chat.response.module query": "問題/搜尋詞",
|
||||
"core.chat.response.module quoteList": "引用內容",
|
||||
"core.chat.response.module similarity": "相似度",
|
||||
"core.chat.response.module temperature": "溫度",
|
||||
"core.chat.response.module time": "執行時長",
|
||||
@ -434,7 +433,6 @@
|
||||
"core.dataset.Text collection": "文字資料集",
|
||||
"core.dataset.apiFile": "API 檔案",
|
||||
"core.dataset.collection.Click top config website": "點選設定網站",
|
||||
"core.dataset.collection.Collection name": "資料集名稱",
|
||||
"core.dataset.collection.Collection raw text": "資料集內容",
|
||||
"core.dataset.collection.Empty Tip": "資料集是空的",
|
||||
"core.dataset.collection.QA Prompt": "問答拆分提示詞",
|
||||
@ -451,7 +449,6 @@
|
||||
"core.dataset.collection.metadata.metadata": "中繼資料",
|
||||
"core.dataset.collection.metadata.read source": "檢視原始內容",
|
||||
"core.dataset.collection.metadata.source": "資料來源",
|
||||
"core.dataset.collection.metadata.source name": "來源名稱",
|
||||
"core.dataset.collection.metadata.source size": "來源大小",
|
||||
"core.dataset.collection.status.active": "已就緒",
|
||||
"core.dataset.collection.status.error": "訓練異常",
|
||||
@ -555,7 +552,7 @@
|
||||
"core.dataset.training.Agent queue": "問答訓練排隊中",
|
||||
"core.dataset.training.Auto mode": "補充索引",
|
||||
"core.dataset.training.Auto mode Tip": "透過子索引以及呼叫模型產生相關問題與摘要,來增加資料區塊的語意豐富度,更有利於檢索。需要消耗更多的儲存空間並增加 AI 呼叫次數。",
|
||||
"core.dataset.training.Chunk mode": "分塊存儲",
|
||||
"core.dataset.training.Chunk mode": "分塊儲存",
|
||||
"core.dataset.training.Full": "預計 20 分鐘以上",
|
||||
"core.dataset.training.Leisure": "閒置",
|
||||
"core.dataset.training.QA mode": "問答對提取",
|
||||
@ -788,7 +785,6 @@
|
||||
"dataset.dataset_name": "知識庫名稱",
|
||||
"dataset.deleteFolderTips": "確認刪除此資料夾及其包含的所有知識庫?刪除後資料無法復原,請確認!",
|
||||
"dataset.test.noResult": "搜尋結果為空",
|
||||
"dataset_data_import_q_placeholder": "最多 {{maxToken}} 字。",
|
||||
"dataset_data_input_a": "答案",
|
||||
"dataset_data_input_chunk": "常規模式",
|
||||
"dataset_data_input_chunk_content": "內容",
|
||||
@ -802,7 +798,6 @@
|
||||
"delete_success": "刪除成功",
|
||||
"delete_warning": "刪除警告",
|
||||
"embedding_model_not_config": "偵測到沒有可用的索引模型",
|
||||
"error.Create failed": "建立失敗",
|
||||
"error.code_error": "驗證碼錯誤",
|
||||
"error.fileNotFound": "找不到檔案",
|
||||
"error.inheritPermissionError": "繼承權限錯誤",
|
||||
@ -1208,6 +1203,7 @@
|
||||
"templateTags.Writing": "文字創作",
|
||||
"template_market": "模板市場",
|
||||
"textarea_variable_picker_tip": "輸入「/」以選擇變數",
|
||||
"to_dataset": "前往知識庫",
|
||||
"ui.textarea.Magnifying": "放大",
|
||||
"un_used": "未使用",
|
||||
"unauth_token": "憑證已過期,請重新登入",
|
||||
|
||||
@ -26,16 +26,21 @@
|
||||
"collection.training_type": "處理模式",
|
||||
"collection_data_count": "資料量",
|
||||
"collection_metadata_custom_pdf_parse": "PDF 增強解析",
|
||||
"collection_name": "數據集名稱",
|
||||
"collection_not_support_retraining": "此集合類型不支援重新調整參數",
|
||||
"collection_not_support_sync": "該集合不支援同步",
|
||||
"collection_sync": "立即同步",
|
||||
"collection_sync_confirm_tip": "確認開始同步資料?\n系統將會拉取最新資料進行比較,如果內容不相同,則會建立一個新的集合並刪除舊的集合,請確認!",
|
||||
"collection_tags": "集合標籤",
|
||||
"common.dataset.data.Input Error Tip": "[圖片數據集] 處理過程錯誤:",
|
||||
"common.error.unKnow": "未知錯誤",
|
||||
"common_dataset": "通用資料集",
|
||||
"common_dataset_desc": "通過導入文件、網頁鏈接或手動錄入形式構建知識庫",
|
||||
"condition": "條件",
|
||||
"config_sync_schedule": "設定定時同步",
|
||||
"confirm_import_images": "共 {{num}} 張圖片 | 確認創建",
|
||||
"confirm_to_rebuild_embedding_tip": "確定要為資料集切換索引嗎?\n切換索引是一個重要的操作,需要對您資料集內所有資料重新建立索引,可能需要較長時間,請確保帳號內剩餘點數充足。\n\n此外,您還需要注意修改使用此資料集的應用程式,避免與其他索引模型資料集混用。",
|
||||
"core.dataset.Image collection": "圖片數據集",
|
||||
"core.dataset.import.Adjust parameters": "調整參數",
|
||||
"custom_data_process_params": "自訂",
|
||||
"custom_data_process_params_desc": "自訂資料處理規則",
|
||||
@ -88,6 +93,7 @@
|
||||
"image_auto_parse": "圖片自動索引",
|
||||
"image_auto_parse_tips": "呼叫 VLM 自動標註文件裡的圖片,並生成額外的檢索索引",
|
||||
"image_training_queue": "圖片處理排隊",
|
||||
"images_creating": "正在創建",
|
||||
"immediate_sync": "立即同步",
|
||||
"import.Auto mode Estimated Price Tips": "需呼叫文字理解模型,將消耗較多 AI 點數:{{price}} 點數 / 1K tokens",
|
||||
"import.Embedding Estimated Price Tips": "僅使用索引模型,消耗少量 AI 點數:{{price}} 點數 / 1K tokens",
|
||||
@ -102,6 +108,8 @@
|
||||
"index_size": "索引大小",
|
||||
"index_size_tips": "向量化時內容的長度,系統會自動按該大小對分塊進行進一步的分割。",
|
||||
"input_required_field_to_select_baseurl": "請先輸入必填信息",
|
||||
"insert_images": "新增圖片",
|
||||
"insert_images_success": "新增圖片成功,需等待訓練完成才會展示",
|
||||
"is_open_schedule": "啟用定時同步",
|
||||
"keep_image": "保留圖片",
|
||||
"loading": "加載中...",
|
||||
@ -133,6 +141,7 @@
|
||||
"process.Image_Index": "圖片索引生成",
|
||||
"process.Is_Ready": "已就緒",
|
||||
"process.Is_Ready_Count": "{{count}} 組已就緒",
|
||||
"process.Parse_Image": "圖片解析中",
|
||||
"process.Parsing": "內容解析中",
|
||||
"process.Vectorizing": "索引向量化",
|
||||
"process.Waiting": "排隊中",
|
||||
@ -174,11 +183,13 @@
|
||||
"the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "資料集有索引正在訓練或重建中",
|
||||
"total_num_files": "共 {{total}} 個文件",
|
||||
"training.Error": "{{count}} 組異常",
|
||||
"training.Image mode": "圖片處理",
|
||||
"training.Normal": "正常",
|
||||
"training_mode": "分段模式",
|
||||
"training_ready": "{{count}} 組",
|
||||
"vector_model_max_tokens_tip": "每個分塊資料,最大長度為 3000 tokens",
|
||||
"vllm_model": "圖片理解模型",
|
||||
"vlm_model_required_warning": "需要圖片理解模型",
|
||||
"website_dataset": "網站同步",
|
||||
"website_dataset_desc": "通過爬蟲,批量爬取網頁數據構建知識庫",
|
||||
"website_info": "網站資訊",
|
||||
|
||||
@ -1,9 +1,31 @@
|
||||
{
|
||||
"Action": "請選擇要上傳的圖片",
|
||||
"All images import failed": "所有圖片導入失敗",
|
||||
"Dataset_ID_not_found": "數據集ID不存在",
|
||||
"Failed_to_get_token": "獲取令牌失敗",
|
||||
"Image_ID_copied": "已復制ID",
|
||||
"Image_Preview": "圖片預覽",
|
||||
"Image_dataset_requires_VLM_model_to_be_configured": "圖片數據集需要配置圖片理解模型(VLM)才能使用,請先在模型配置中添加支持圖片理解的模型",
|
||||
"Image_does_not_belong_to_current_team": "圖片不屬於當前團隊",
|
||||
"Image_file_does_not_exist": "圖片不存在",
|
||||
"Loading_image": "加載圖片中...",
|
||||
"Loading_image_failed": "預覽加載失敗",
|
||||
"Only_support_uploading_one_image": "僅支持上傳一張圖片",
|
||||
"image_description_tip": "請輸入圖片的描述內容",
|
||||
"Please select the image to upload": "請選擇要上傳的圖片",
|
||||
"Please select the image to upload select the image to upload": "",
|
||||
"Please wait for all files to upload": "請等待所有文件上傳完成",
|
||||
"bucket_chat": "對話檔案",
|
||||
"bucket_file": "知識庫檔案",
|
||||
"click_to_view_raw_source": "點選檢視原始來源",
|
||||
"dataset_data_input_image_support_format": "支持 .jpg, .jpeg, .png, .gif, .webp 格式",
|
||||
"delete_image": "刪除圖片",
|
||||
"file_name": "檔案名稱",
|
||||
"file_size": "檔案大小",
|
||||
"image": "圖片",
|
||||
"image_collection": "圖片集合",
|
||||
"please_upload_image_first": "請先上傳圖片",
|
||||
"reached_max_file_count": "已達檔案數量上限",
|
||||
"release_the_mouse_to_upload_the_file": "放開滑鼠以上傳檔案",
|
||||
"select_and_drag_file_tip": "點選或拖曳檔案至此處上傳",
|
||||
"select_file_amount_limit": "最多可選擇 {{max}} 個檔案",
|
||||
@ -12,7 +34,9 @@
|
||||
"support_file_type": "支援 {{fileType}} 格式的檔案",
|
||||
"support_max_count": "最多可支援 {{maxCount}} 個檔案",
|
||||
"support_max_size": "單一檔案大小上限為 {{maxSize}}",
|
||||
"total_files": "共{{selectFiles.length}}個文件",
|
||||
"upload_error_description": "單次僅支援上傳多個檔案或一個資料夾",
|
||||
"upload_failed": "上傳失敗",
|
||||
"reached_max_file_count": "已達檔案數量上限",
|
||||
"upload_error_description": "單次僅支援上傳多個檔案或一個資料夾"
|
||||
}
|
||||
"upload_file_error": "請上傳圖片",
|
||||
"uploading": "正在上傳..."
|
||||
}
|
||||
|
||||
@ -3,38 +3,28 @@ import { Skeleton, type ImageProps } from '@chakra-ui/react';
|
||||
import CustomImage from '@fastgpt/web/components/common/Image/MyImage';
|
||||
|
||||
export const MyImage = (props: ImageProps) => {
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
const [succeed, setSucceed] = useState(false);
|
||||
|
||||
return (
|
||||
<Skeleton
|
||||
minH="100px"
|
||||
isLoaded={!isLoading}
|
||||
fadeDuration={2}
|
||||
display={'flex'}
|
||||
justifyContent={'center'}
|
||||
my={1}
|
||||
>
|
||||
<CustomImage
|
||||
display={'inline-block'}
|
||||
borderRadius={'md'}
|
||||
alt={''}
|
||||
fallbackSrc={'/imgs/errImg.png'}
|
||||
fallbackStrategy={'onError'}
|
||||
cursor={succeed ? 'pointer' : 'default'}
|
||||
objectFit={'contain'}
|
||||
loading={'lazy'}
|
||||
onLoad={() => {
|
||||
setIsLoading(false);
|
||||
setSucceed(true);
|
||||
}}
|
||||
onError={() => setIsLoading(false)}
|
||||
onClick={() => {
|
||||
if (!succeed) return;
|
||||
window.open(props.src, '_blank');
|
||||
}}
|
||||
{...props}
|
||||
/>
|
||||
</Skeleton>
|
||||
<CustomImage
|
||||
title={'Preview image'}
|
||||
display={'inline-block'}
|
||||
borderRadius={'md'}
|
||||
alt={''}
|
||||
fallbackSrc={'/imgs/errImg.png'}
|
||||
fallbackStrategy={'onError'}
|
||||
cursor={succeed ? 'pointer' : 'default'}
|
||||
objectFit={'contain'}
|
||||
loading={'lazy'}
|
||||
onLoad={() => {
|
||||
setSucceed(true);
|
||||
}}
|
||||
onClick={() => {
|
||||
if (!succeed) return;
|
||||
window.open(props.src, '_blank');
|
||||
}}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
@ -18,7 +18,7 @@ const NextHead = ({ title, icon, desc }: { title?: string; icon?: string; desc?:
|
||||
name="viewport"
|
||||
content="width=device-width,initial-scale=1.0,maximum-scale=1.0,minimum-scale=1.0,user-scalable=no, viewport-fit=cover"
|
||||
/>
|
||||
<meta httpEquiv="Content-Security-Policy" content="img-src * data:;" />
|
||||
<meta httpEquiv="Content-Security-Policy" content="img-src * data: blob:;" />
|
||||
{desc && <meta name="description" content={desc} />}
|
||||
{icon && <link rel="icon" href={formatIcon} />}
|
||||
</Head>
|
||||
|
||||
@ -240,7 +240,7 @@ const LexiconConfigModal = ({ appId, onClose }: { appId: string; onClose: () =>
|
||||
onSuccess() {
|
||||
setNewData(undefined);
|
||||
},
|
||||
errorToast: t('common:error.Create failed')
|
||||
errorToast: t('common:create_failed')
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
@ -57,11 +57,12 @@ const QuoteList = React.memo(function QuoteList({
|
||||
return {
|
||||
...item,
|
||||
q: currentFilterItem?.q || '',
|
||||
a: currentFilterItem?.a || ''
|
||||
a: currentFilterItem?.a || '',
|
||||
imagePreivewUrl: currentFilterItem?.imagePreivewUrl
|
||||
};
|
||||
}
|
||||
|
||||
return { ...item, q: item.q || '', a: item.a || '' };
|
||||
return { ...item, q: item.q || '' };
|
||||
});
|
||||
|
||||
return processedData.sort((a, b) => {
|
||||
@ -87,6 +88,7 @@ const QuoteList = React.memo(function QuoteList({
|
||||
<QuoteItem
|
||||
quoteItem={item}
|
||||
canViewSource={showRawSource}
|
||||
canEditData={showRouteToDatasetDetail}
|
||||
canEditDataset={showRouteToDatasetDetail}
|
||||
{...RawSourceBoxProps}
|
||||
/>
|
||||
|
||||
@ -81,7 +81,9 @@ const ResponseTags = ({
|
||||
.map((item) => ({
|
||||
sourceName: item.sourceName,
|
||||
sourceId: item.sourceId,
|
||||
icon: getSourceNameIcon({ sourceId: item.sourceId, sourceName: item.sourceName }),
|
||||
icon: item.imageId
|
||||
? 'core/dataset/imageFill'
|
||||
: getSourceNameIcon({ sourceId: item.sourceId, sourceName: item.sourceName }),
|
||||
collectionId: item.collectionId,
|
||||
datasetId: item.datasetId
|
||||
}));
|
||||
|
||||
@ -300,7 +300,7 @@ export const WholeResponseContent = ({
|
||||
<Row label={t('chat:query_extension_result')} value={`${activeModule?.extensionResult}`} />
|
||||
{activeModule.quoteList && activeModule.quoteList.length > 0 && (
|
||||
<Row
|
||||
label={t('common:core.chat.response.module quoteList')}
|
||||
label={t('chat:search_results')}
|
||||
rawDom={<QuoteList chatItemDataId={dataId} rawSearch={activeModule.quoteList} />}
|
||||
/>
|
||||
)}
|
||||
|
||||
@ -8,7 +8,11 @@ import { useTranslation } from 'next-i18next';
|
||||
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||
import dynamic from 'next/dynamic';
|
||||
import MyBox from '@fastgpt/web/components/common/MyBox';
|
||||
import { SearchScoreTypeEnum, SearchScoreTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
SearchScoreTypeEnum,
|
||||
SearchScoreTypeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import type { readCollectionSourceBody } from '@/pages/api/core/dataset/collection/read';
|
||||
import Markdown from '@/components/Markdown';
|
||||
|
||||
@ -88,11 +92,13 @@ export const formatScore = (score: ScoreItemType[]) => {
|
||||
const QuoteItem = ({
|
||||
quoteItem,
|
||||
canViewSource,
|
||||
canEditData,
|
||||
canEditDataset,
|
||||
...RawSourceBoxProps
|
||||
}: {
|
||||
quoteItem: SearchDataResponseItemType;
|
||||
canViewSource?: boolean;
|
||||
canEditData?: boolean;
|
||||
canEditDataset?: boolean;
|
||||
} & Omit<readCollectionSourceBody, 'collectionId'>) => {
|
||||
const { t } = useTranslation();
|
||||
@ -206,7 +212,7 @@ const QuoteItem = ({
|
||||
{...RawSourceBoxProps}
|
||||
/>
|
||||
<Box flex={1} />
|
||||
{quoteItem.id && canEditDataset && (
|
||||
{quoteItem.id && canEditData && (
|
||||
<MyTooltip label={t('common:core.dataset.data.Edit')}>
|
||||
<Box
|
||||
className="hover-data"
|
||||
@ -238,12 +244,13 @@ const QuoteItem = ({
|
||||
<Link
|
||||
as={NextLink}
|
||||
className="hover-data"
|
||||
visibility={'hidden'}
|
||||
display={'flex'}
|
||||
alignItems={'center'}
|
||||
visibility={'hidden'}
|
||||
color={'primary.500'}
|
||||
href={`/dataset/detail?datasetId=${quoteItem.datasetId}¤tTab=dataCard&collectionId=${quoteItem.collectionId}`}
|
||||
>
|
||||
{t('chat:to_dataset')}
|
||||
{t('common:to_dataset')}
|
||||
<MyIcon name={'common/rightArrowLight'} w={'10px'} />
|
||||
</Link>
|
||||
)}
|
||||
|
||||
@ -3,20 +3,22 @@ import { Box, type BoxProps } from '@chakra-ui/react';
|
||||
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
|
||||
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import type { readCollectionSourceBody } from '@/pages/api/core/dataset/collection/read';
|
||||
import type { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
type Props = BoxProps &
|
||||
readCollectionSourceBody & {
|
||||
collectionType?: DatasetCollectionTypeEnum;
|
||||
sourceName?: string;
|
||||
collectionId: string;
|
||||
sourceId?: string;
|
||||
canView?: boolean;
|
||||
};
|
||||
|
||||
const RawSourceBox = ({
|
||||
sourceId,
|
||||
collectionType,
|
||||
sourceName = '',
|
||||
canView = true,
|
||||
|
||||
@ -35,7 +37,10 @@ const RawSourceBox = ({
|
||||
|
||||
const canPreview = !!sourceId && canView;
|
||||
|
||||
const icon = useMemo(() => getSourceNameIcon({ sourceId, sourceName }), [sourceId, sourceName]);
|
||||
const icon = useMemo(
|
||||
() => getCollectionIcon({ type: collectionType, sourceId, name: sourceName }),
|
||||
[collectionType, sourceId, sourceName]
|
||||
);
|
||||
const read = getCollectionSourceAndOpen({
|
||||
collectionId,
|
||||
appId,
|
||||
|
||||
@ -34,9 +34,11 @@ export type DatasetDataListItemType = {
|
||||
_id: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
q: string; // embedding content
|
||||
a: string; // bonus content
|
||||
q?: string;
|
||||
a?: string;
|
||||
imageId?: string;
|
||||
imageSize?: number;
|
||||
imagePreviewUrl?: string; //image preview url
|
||||
chunkIndex?: number;
|
||||
updated?: boolean;
|
||||
// indexes: DatasetDataSchemaType['indexes'];
|
||||
};
|
||||
|
||||
@ -50,7 +50,7 @@ const BackupImportModal = ({
|
||||
maxCount={1}
|
||||
fileType="csv"
|
||||
selectFiles={selectFiles}
|
||||
setSelectFiles={setSelectFiles}
|
||||
setSelectFiles={(e) => setSelectFiles(e)}
|
||||
/>
|
||||
{/* File render */}
|
||||
{selectFiles.length > 0 && (
|
||||
|
||||
@ -248,6 +248,26 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
|
||||
});
|
||||
}
|
||||
},
|
||||
...(feConfigs?.isPlus
|
||||
? [
|
||||
{
|
||||
label: (
|
||||
<Flex>
|
||||
<MyIcon name={'image'} mr={2} w={'20px'} />
|
||||
{t('dataset:core.dataset.Image collection')}
|
||||
</Flex>
|
||||
),
|
||||
onClick: () =>
|
||||
router.replace({
|
||||
query: {
|
||||
...router.query,
|
||||
currentTab: TabEnum.import,
|
||||
source: ImportDataSourceEnum.imageDataset
|
||||
}
|
||||
})
|
||||
}
|
||||
]
|
||||
: []),
|
||||
{
|
||||
label: (
|
||||
<Flex>
|
||||
@ -473,7 +493,10 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
|
||||
name={editFolderData.name}
|
||||
/>
|
||||
)}
|
||||
<EditCreateVirtualFileModal iconSrc={'modal/manualDataset'} closeBtnText={''} />
|
||||
<EditCreateVirtualFileModal
|
||||
iconSrc={'modal/manualDataset'}
|
||||
closeBtnText={t('common:Cancel')}
|
||||
/>
|
||||
{isOpenFileSourceSelector && <FileSourceSelector onClose={onCloseFileSourceSelector} />}
|
||||
{isOpenBackupImportModal && (
|
||||
<BackupImportModal
|
||||
|
||||
@ -421,7 +421,7 @@ const AddTagToCollections = ({
|
||||
() =>
|
||||
collectionsList.map((item) => {
|
||||
const collection = item.data;
|
||||
const icon = getCollectionIcon(collection.type, collection.name);
|
||||
const icon = getCollectionIcon({ type: collection.type, name: collection.name });
|
||||
return {
|
||||
id: collection._id,
|
||||
tags: collection.tags,
|
||||
|
||||
@ -35,6 +35,8 @@ import { useForm } from 'react-hook-form';
|
||||
import type { getTrainingDetailResponse } from '@/pages/api/core/dataset/collection/trainingDetail';
|
||||
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
|
||||
import EmptyTip from '@fastgpt/web/components/common/EmptyTip';
|
||||
import MyImage from '@/components/MyImage';
|
||||
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
|
||||
|
||||
enum TrainingStatus {
|
||||
NotStart = 'NotStart',
|
||||
@ -48,6 +50,8 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
|
||||
const { t } = useTranslation();
|
||||
|
||||
const isQA = trainingDetail?.trainingType === DatasetCollectionDataProcessModeEnum.qa;
|
||||
const isImageParse =
|
||||
trainingDetail?.trainingType === DatasetCollectionDataProcessModeEnum.imageParse;
|
||||
|
||||
/*
|
||||
状态计算
|
||||
@ -102,6 +106,18 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
|
||||
status: TrainingStatus.Ready,
|
||||
errorCount: 0
|
||||
},
|
||||
...(isImageParse
|
||||
? [
|
||||
{
|
||||
errorCount: trainingDetail.errorCounts.imageParse,
|
||||
label: t(TrainingProcess.parseImage.label),
|
||||
statusText: getStatusText(TrainingModeEnum.imageParse),
|
||||
status: getTrainingStatus({
|
||||
errorCount: trainingDetail.errorCounts.imageParse
|
||||
})
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(isQA
|
||||
? [
|
||||
{
|
||||
@ -114,7 +130,7 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(trainingDetail?.advancedTraining.imageIndex && !isQA
|
||||
...(trainingDetail?.advancedTraining.imageIndex
|
||||
? [
|
||||
{
|
||||
errorCount: trainingDetail.errorCounts.image,
|
||||
@ -126,7 +142,7 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(trainingDetail?.advancedTraining.autoIndexes && !isQA
|
||||
...(trainingDetail?.advancedTraining.autoIndexes
|
||||
? [
|
||||
{
|
||||
errorCount: trainingDetail.errorCounts.auto,
|
||||
@ -159,7 +175,17 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
|
||||
];
|
||||
|
||||
return states;
|
||||
}, [trainingDetail, t, isQA]);
|
||||
}, [
|
||||
trainingDetail.queuedCounts,
|
||||
trainingDetail.trainingCounts,
|
||||
trainingDetail.errorCounts,
|
||||
trainingDetail?.advancedTraining.imageIndex,
|
||||
trainingDetail?.advancedTraining.autoIndexes,
|
||||
trainingDetail.trainedCount,
|
||||
t,
|
||||
isImageParse,
|
||||
isQA
|
||||
]);
|
||||
|
||||
return (
|
||||
<Flex flexDirection={'column'} gap={6}>
|
||||
@ -254,11 +280,20 @@ const ProgressView = ({ trainingDetail }: { trainingDetail: getTrainingDetailRes
|
||||
);
|
||||
};
|
||||
|
||||
const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionId: string }) => {
|
||||
const ErrorView = ({
|
||||
datasetId,
|
||||
collectionId,
|
||||
refreshTrainingDetail
|
||||
}: {
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
refreshTrainingDetail: () => void;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const TrainingText = {
|
||||
[TrainingModeEnum.chunk]: t('dataset:process.Vectorizing'),
|
||||
[TrainingModeEnum.qa]: t('dataset:process.Get QA'),
|
||||
[TrainingModeEnum.imageParse]: t('dataset:process.Image_Index'),
|
||||
[TrainingModeEnum.image]: t('dataset:process.Image_Index'),
|
||||
[TrainingModeEnum.auto]: t('dataset:process.Auto_Index')
|
||||
};
|
||||
@ -308,6 +343,7 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI
|
||||
manual: true,
|
||||
onSuccess: () => {
|
||||
refreshList();
|
||||
refreshTrainingDetail();
|
||||
setEditChunk(undefined);
|
||||
}
|
||||
}
|
||||
@ -316,6 +352,7 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI
|
||||
if (editChunk) {
|
||||
return (
|
||||
<EditView
|
||||
loading={updateLoading}
|
||||
editChunk={editChunk}
|
||||
onCancel={() => setEditChunk(undefined)}
|
||||
onSave={(data) => {
|
||||
@ -401,10 +438,12 @@ const ErrorView = ({ datasetId, collectionId }: { datasetId: string; collectionI
|
||||
};
|
||||
|
||||
const EditView = ({
|
||||
loading,
|
||||
editChunk,
|
||||
onCancel,
|
||||
onSave
|
||||
}: {
|
||||
loading: boolean;
|
||||
editChunk: getTrainingDataDetailResponse;
|
||||
onCancel: () => void;
|
||||
onSave: (data: { q: string; a?: string }) => void;
|
||||
@ -419,20 +458,41 @@ const EditView = ({
|
||||
|
||||
return (
|
||||
<Flex flexDirection={'column'} gap={4}>
|
||||
{editChunk?.a && <Box>q</Box>}
|
||||
<MyTextarea {...register('q')} minH={editChunk?.a ? 200 : 400} />
|
||||
{editChunk?.imagePreviewUrl && (
|
||||
<Box>
|
||||
<FormLabel>{t('file:image')}</FormLabel>
|
||||
<Box w={'100%'} h={'200px'} border={'base'} borderRadius={'md'}>
|
||||
<MyImage src={editChunk.imagePreviewUrl} alt="image" w={'100%'} h={'100%'} />
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
<Box>
|
||||
{(editChunk?.a || editChunk?.imagePreviewUrl) && (
|
||||
<FormLabel>
|
||||
{editChunk?.a
|
||||
? t('common:dataset_data_input_chunk_content')
|
||||
: t('common:dataset_data_input_q')}
|
||||
</FormLabel>
|
||||
)}
|
||||
<MyTextarea
|
||||
{...register('q', { required: true })}
|
||||
minH={editChunk?.a || editChunk?.imagePreviewUrl ? 200 : 400}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
{editChunk?.a && (
|
||||
<>
|
||||
<Box>a</Box>
|
||||
<Box>
|
||||
<Box>{t('common:dataset_data_input_a')}</Box>
|
||||
<MyTextarea {...register('a')} minH={200} />
|
||||
</>
|
||||
</Box>
|
||||
)}
|
||||
<Flex justifyContent={'flex-end'} gap={4}>
|
||||
<Button variant={'outline'} onClick={onCancel}>
|
||||
{t('common:Cancel')}
|
||||
</Button>
|
||||
<Button variant={'primary'} onClick={handleSubmit(onSave)}>
|
||||
{t('dataset:dataset.ReTrain')}
|
||||
<Button isLoading={loading} variant={'primary'} onClick={handleSubmit(onSave)}>
|
||||
{t('common:Confirm')}
|
||||
</Button>
|
||||
</Flex>
|
||||
</Flex>
|
||||
@ -453,14 +513,15 @@ const TrainingStates = ({
|
||||
const { t } = useTranslation();
|
||||
const [tab, setTab] = useState<typeof defaultTab>(defaultTab);
|
||||
|
||||
const { data: trainingDetail, loading } = useRequest2(
|
||||
() => getDatasetCollectionTrainingDetail(collectionId),
|
||||
{
|
||||
pollingInterval: 5000,
|
||||
pollingWhenHidden: false,
|
||||
manual: false
|
||||
}
|
||||
);
|
||||
const {
|
||||
data: trainingDetail,
|
||||
loading,
|
||||
runAsync: refreshTrainingDetail
|
||||
} = useRequest2(() => getDatasetCollectionTrainingDetail(collectionId), {
|
||||
pollingInterval: 5000,
|
||||
pollingWhenHidden: false,
|
||||
manual: false
|
||||
});
|
||||
|
||||
const errorCounts = (Object.values(trainingDetail?.errorCounts || {}) as number[]).reduce(
|
||||
(acc, count) => acc + count,
|
||||
@ -493,7 +554,13 @@ const TrainingStates = ({
|
||||
]}
|
||||
/>
|
||||
{tab === 'states' && trainingDetail && <ProgressView trainingDetail={trainingDetail} />}
|
||||
{tab === 'errors' && <ErrorView datasetId={datasetId} collectionId={collectionId} />}
|
||||
{tab === 'errors' && (
|
||||
<ErrorView
|
||||
datasetId={datasetId}
|
||||
collectionId={collectionId}
|
||||
refreshTrainingDetail={refreshTrainingDetail}
|
||||
/>
|
||||
)}
|
||||
</ModalBody>
|
||||
</MyModal>
|
||||
);
|
||||
|
||||
@ -75,7 +75,7 @@ const CollectionCard = () => {
|
||||
const formatCollections = useMemo(
|
||||
() =>
|
||||
collections.map((collection) => {
|
||||
const icon = getCollectionIcon(collection.type, collection.name);
|
||||
const icon = getCollectionIcon({ type: collection.type, name: collection.name });
|
||||
const status = (() => {
|
||||
if (collection.hasError) {
|
||||
return {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import React, { useState, useMemo } from 'react';
|
||||
import { Box, Card, IconButton, Flex, Button, useTheme } from '@chakra-ui/react';
|
||||
import { Box, Card, IconButton, Flex, Button, useTheme, Image } from '@chakra-ui/react';
|
||||
import {
|
||||
getDatasetDataList,
|
||||
delOneDatasetDataById,
|
||||
@ -24,28 +24,36 @@ import TagsPopOver from './CollectionCard/TagsPopOver';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyDivider from '@fastgpt/web/components/common/MyDivider';
|
||||
import Markdown from '@/components/Markdown';
|
||||
import { useMemoizedFn } from 'ahooks';
|
||||
import { useBoolean, useMemoizedFn } from 'ahooks';
|
||||
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
|
||||
import { TabEnum } from './NavBar';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
ImportDataSourceEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import TrainingStates from './CollectionCard/TrainingStates';
|
||||
import { getTextValidLength } from '@fastgpt/global/common/string/utils';
|
||||
import PopoverConfirm from '@fastgpt/web/components/common/MyPopover/PopoverConfirm';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import MyImage from '@fastgpt/web/components/common/Image/MyImage';
|
||||
import dynamic from 'next/dynamic';
|
||||
|
||||
const InsertImagesModal = dynamic(() => import('./data/InsertImageModal'), {
|
||||
ssr: false
|
||||
});
|
||||
|
||||
const DataCard = () => {
|
||||
const theme = useTheme();
|
||||
const router = useRouter();
|
||||
const { isPc } = useSystem();
|
||||
const { collectionId = '', datasetId } = router.query as {
|
||||
const { feConfigs } = useSystemStore();
|
||||
|
||||
const { collectionId = '' } = router.query as {
|
||||
collectionId: string;
|
||||
datasetId: string;
|
||||
};
|
||||
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
|
||||
const { feConfigs } = useSystemStore();
|
||||
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
|
||||
|
||||
const { t } = useTranslation();
|
||||
const [searchText, setSearchText] = useState('');
|
||||
@ -78,21 +86,30 @@ const DataCard = () => {
|
||||
|
||||
const [editDataId, setEditDataId] = useState<string>();
|
||||
|
||||
// get file info
|
||||
const { data: collection } = useRequest2(() => getDatasetCollectionById(collectionId), {
|
||||
refreshDeps: [collectionId],
|
||||
manual: false,
|
||||
onError: () => {
|
||||
router.replace({
|
||||
query: {
|
||||
datasetId
|
||||
}
|
||||
});
|
||||
// Get collection info
|
||||
const { data: collection, runAsync: reloadCollection } = useRequest2(
|
||||
() => getDatasetCollectionById(collectionId),
|
||||
{
|
||||
refreshDeps: [collectionId],
|
||||
manual: false,
|
||||
onError: () => {
|
||||
router.replace({
|
||||
query: {
|
||||
datasetId
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
);
|
||||
|
||||
const canWrite = useMemo(() => datasetDetail.permission.hasWritePer, [datasetDetail]);
|
||||
|
||||
const [
|
||||
isInsertImagesModalOpen,
|
||||
{ setTrue: openInsertImagesModal, setFalse: closeInsertImagesModal }
|
||||
] = useBoolean();
|
||||
const isImageCollection = collection?.type === DatasetCollectionTypeEnum.images;
|
||||
|
||||
const onDeleteOneData = useMemoizedFn(async (dataId: string) => {
|
||||
try {
|
||||
await delOneDatasetDataById(dataId);
|
||||
@ -125,6 +142,7 @@ const DataCard = () => {
|
||||
>
|
||||
{collection?._id && (
|
||||
<RawSourceBox
|
||||
collectionType={collection.type}
|
||||
collectionId={collection._id}
|
||||
{...getCollectionSourceData(collection)}
|
||||
fontSize={['sm', 'md']}
|
||||
@ -158,7 +176,7 @@ const DataCard = () => {
|
||||
{t('dataset:retain_collection')}
|
||||
</Button>
|
||||
)}
|
||||
{canWrite && (
|
||||
{canWrite && !isImageCollection && (
|
||||
<Button
|
||||
ml={2}
|
||||
variant={'whitePrimary'}
|
||||
@ -171,6 +189,17 @@ const DataCard = () => {
|
||||
{t('common:dataset.Insert Data')}
|
||||
</Button>
|
||||
)}
|
||||
{canWrite && isImageCollection && (
|
||||
<Button
|
||||
ml={2}
|
||||
variant={'whitePrimary'}
|
||||
size={['sm', 'md']}
|
||||
isDisabled={!collection}
|
||||
onClick={openInsertImagesModal}
|
||||
>
|
||||
{t('dataset:insert_images')}
|
||||
</Button>
|
||||
)}
|
||||
</Flex>
|
||||
<Box justifyContent={'center'} px={6} pos={'relative'} w={'100%'}>
|
||||
<MyDivider my={'17px'} w={'100%'} />
|
||||
@ -236,7 +265,7 @@ const DataCard = () => {
|
||||
userSelect={'none'}
|
||||
boxShadow={'none'}
|
||||
bg={index % 2 === 1 ? 'myGray.50' : 'blue.50'}
|
||||
border={theme.borders.sm}
|
||||
border={'sm'}
|
||||
position={'relative'}
|
||||
overflow={'hidden'}
|
||||
_hover={{
|
||||
@ -282,17 +311,35 @@ const DataCard = () => {
|
||||
</Flex>
|
||||
|
||||
{/* Data content */}
|
||||
<Box wordBreak={'break-all'} fontSize={'sm'}>
|
||||
<Markdown source={item.q} isDisabled />
|
||||
{!!item.a && (
|
||||
<>
|
||||
<MyDivider />
|
||||
<Markdown source={item.a} isDisabled />
|
||||
</>
|
||||
)}
|
||||
</Box>
|
||||
{item.imagePreviewUrl ? (
|
||||
<Box display={['block', 'flex']} alignItems={'center'} gap={[3, 6]}>
|
||||
<Box flex="1 0 0">
|
||||
<MyImage
|
||||
src={item.imagePreviewUrl}
|
||||
alt={''}
|
||||
w={'100%'}
|
||||
h="100%"
|
||||
maxH={'300px'}
|
||||
objectFit="contain"
|
||||
/>
|
||||
</Box>
|
||||
<Box flex="1 0 0" maxH={'300px'} overflow={'hidden'} fontSize="sm">
|
||||
<Markdown source={item.q} isDisabled />
|
||||
</Box>
|
||||
</Box>
|
||||
) : (
|
||||
<Box wordBreak={'break-all'} fontSize={'sm'}>
|
||||
<Markdown source={item.q} isDisabled />
|
||||
{!!item.a && (
|
||||
<>
|
||||
<MyDivider />
|
||||
<Markdown source={item.a} isDisabled />
|
||||
</>
|
||||
)}
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{/* Mask */}
|
||||
{/* Footer */}
|
||||
<Flex
|
||||
className="footer"
|
||||
position={'absolute'}
|
||||
@ -317,17 +364,23 @@ const DataCard = () => {
|
||||
py={1}
|
||||
mr={2}
|
||||
>
|
||||
<MyIcon
|
||||
bg={'white'}
|
||||
color={'myGray.600'}
|
||||
borderRadius={'sm'}
|
||||
border={'1px'}
|
||||
borderColor={'myGray.200'}
|
||||
name="common/text/t"
|
||||
w={'14px'}
|
||||
mr={1}
|
||||
/>
|
||||
{getTextValidLength(item.q + item.a || '')}
|
||||
{item.imageSize ? (
|
||||
<>{formatFileSize(item.imageSize)}</>
|
||||
) : (
|
||||
<>
|
||||
<MyIcon
|
||||
bg={'white'}
|
||||
color={'myGray.600'}
|
||||
borderRadius={'sm'}
|
||||
border={'1px'}
|
||||
borderColor={'myGray.200'}
|
||||
name="common/text/t"
|
||||
w={'14px'}
|
||||
mr={1}
|
||||
/>
|
||||
{getTextValidLength((item?.q || '') + (item?.a || ''))}
|
||||
</>
|
||||
)}
|
||||
</Flex>
|
||||
{canWrite && (
|
||||
<PopoverConfirm
|
||||
@ -362,7 +415,7 @@ const DataCard = () => {
|
||||
collectionId={collection._id}
|
||||
dataId={editDataId}
|
||||
onClose={() => setEditDataId(undefined)}
|
||||
onSuccess={(data) => {
|
||||
onSuccess={(data: any) => {
|
||||
if (editDataId === '') {
|
||||
refreshList();
|
||||
return;
|
||||
@ -386,9 +439,16 @@ const DataCard = () => {
|
||||
datasetId={datasetId}
|
||||
defaultTab={'errors'}
|
||||
collectionId={errorModalId}
|
||||
onClose={() => setErrorModalId('')}
|
||||
onClose={() => {
|
||||
setErrorModalId('');
|
||||
refreshList();
|
||||
reloadCollection();
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
{isInsertImagesModalOpen && (
|
||||
<InsertImagesModal collectionId={collectionId} onClose={closeInsertImagesModal} />
|
||||
)}
|
||||
</MyBox>
|
||||
);
|
||||
};
|
||||
|
||||
@ -173,6 +173,20 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
|
||||
{
|
||||
title: t('dataset:import_confirm')
|
||||
}
|
||||
],
|
||||
[ImportDataSourceEnum.imageDataset]: [
|
||||
{
|
||||
title: t('dataset:import_select_file')
|
||||
},
|
||||
{
|
||||
title: t('dataset:import_param_setting')
|
||||
},
|
||||
{
|
||||
title: t('dataset:import_data_preview')
|
||||
},
|
||||
{
|
||||
title: t('dataset:import_confirm')
|
||||
}
|
||||
]
|
||||
};
|
||||
const steps = modeSteps[source];
|
||||
@ -238,20 +252,22 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
|
||||
<Box flex={1} />
|
||||
</Flex>
|
||||
{/* step */}
|
||||
<Box
|
||||
mt={4}
|
||||
mb={5}
|
||||
px={3}
|
||||
py={[2, 4]}
|
||||
bg={'myGray.50'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
borderRadius={'md'}
|
||||
>
|
||||
<Box maxW={['100%', '900px']} mx={'auto'}>
|
||||
<MyStep />
|
||||
{source !== ImportDataSourceEnum.imageDataset && (
|
||||
<Box
|
||||
mt={4}
|
||||
mb={5}
|
||||
px={3}
|
||||
py={[2, 4]}
|
||||
bg={'myGray.50'}
|
||||
borderWidth={'1px'}
|
||||
borderColor={'borderColor.low'}
|
||||
borderRadius={'md'}
|
||||
>
|
||||
<Box maxW={['100%', '900px']} mx={'auto'}>
|
||||
<MyStep />
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
{children}
|
||||
</DatasetImportContext.Provider>
|
||||
);
|
||||
|
||||
@ -7,15 +7,8 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import React, { type DragEvent, useCallback, useMemo, useState } from 'react';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { getFileIcon } from '@fastgpt/global/common/file/icon';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import { uploadFile2DB } from '@/web/common/file/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import type { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
export type SelectFileItemType = {
|
||||
fileId: string;
|
||||
@ -26,23 +19,18 @@ export type SelectFileItemType = {
|
||||
const FileSelector = ({
|
||||
fileType,
|
||||
selectFiles,
|
||||
setSelectFiles,
|
||||
onStartSelect,
|
||||
onFinishSelect,
|
||||
onSelectFiles,
|
||||
...props
|
||||
}: {
|
||||
fileType: string;
|
||||
selectFiles: ImportSourceItemType[];
|
||||
setSelectFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
|
||||
onStartSelect: () => void;
|
||||
onFinishSelect: () => void;
|
||||
onSelectFiles: (e: SelectFileItemType[]) => any;
|
||||
} & FlexProps) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const { toast } = useToast();
|
||||
const { feConfigs } = useSystemStore();
|
||||
|
||||
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
|
||||
const maxCount = feConfigs?.uploadFileMaxAmount || 1000;
|
||||
const maxSize = (feConfigs?.uploadFileMaxSize || 1024) * 1024 * 1024;
|
||||
|
||||
@ -65,90 +53,6 @@ const FileSelector = ({
|
||||
'i'
|
||||
);
|
||||
|
||||
const { runAsync: onSelectFile, loading: isLoading } = useRequest2(
|
||||
async (files: SelectFileItemType[]) => {
|
||||
{
|
||||
await Promise.all(
|
||||
files.map(async ({ fileId, file }) => {
|
||||
try {
|
||||
const { fileId: uploadFileId } = await uploadFile2DB({
|
||||
file,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
data: {
|
||||
datasetId
|
||||
},
|
||||
percentListen: (e) => {
|
||||
setSelectFiles((state) =>
|
||||
state.map((item) =>
|
||||
item.id === fileId
|
||||
? {
|
||||
...item,
|
||||
uploadedFileRate: item.uploadedFileRate
|
||||
? Math.max(e, item.uploadedFileRate)
|
||||
: e
|
||||
}
|
||||
: item
|
||||
)
|
||||
);
|
||||
}
|
||||
});
|
||||
setSelectFiles((state) =>
|
||||
state.map((item) =>
|
||||
item.id === fileId
|
||||
? {
|
||||
...item,
|
||||
dbFileId: uploadFileId,
|
||||
isUploading: false,
|
||||
uploadedFileRate: 100
|
||||
}
|
||||
: item
|
||||
)
|
||||
);
|
||||
} catch (error) {
|
||||
setSelectFiles((state) =>
|
||||
state.map((item) =>
|
||||
item.id === fileId
|
||||
? {
|
||||
...item,
|
||||
isUploading: false,
|
||||
errorMsg: getErrText(error)
|
||||
}
|
||||
: item
|
||||
)
|
||||
);
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
},
|
||||
{
|
||||
onBefore([files]) {
|
||||
onStartSelect();
|
||||
setSelectFiles((state) => {
|
||||
const formatFiles = files.map<ImportSourceItemType>((selectFile) => {
|
||||
const { fileId, file } = selectFile;
|
||||
|
||||
return {
|
||||
id: fileId,
|
||||
createStatus: 'waiting',
|
||||
file,
|
||||
sourceName: file.name,
|
||||
sourceSize: formatFileSize(file.size),
|
||||
icon: getFileIcon(file.name),
|
||||
isUploading: true,
|
||||
uploadedFileRate: 0
|
||||
};
|
||||
});
|
||||
const results = formatFiles.concat(state).slice(0, maxCount);
|
||||
return results;
|
||||
});
|
||||
},
|
||||
onFinally() {
|
||||
onFinishSelect();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
const selectFileCallback = useCallback(
|
||||
(files: SelectFileItemType[]) => {
|
||||
if (selectFiles.length + files.length > maxCount) {
|
||||
@ -160,7 +64,7 @@ const FileSelector = ({
|
||||
}
|
||||
// size check
|
||||
if (!maxSize) {
|
||||
return onSelectFile(files);
|
||||
return onSelectFiles(files);
|
||||
}
|
||||
const filterFiles = files.filter((item) => item.file.size <= maxSize);
|
||||
|
||||
@ -171,9 +75,9 @@ const FileSelector = ({
|
||||
});
|
||||
}
|
||||
|
||||
return onSelectFile(filterFiles);
|
||||
return onSelectFiles(filterFiles);
|
||||
},
|
||||
[t, maxCount, maxSize, onSelectFile, selectFiles.length, toast]
|
||||
[t, maxCount, maxSize, onSelectFiles, selectFiles.length, toast]
|
||||
);
|
||||
|
||||
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
|
||||
@ -278,7 +182,6 @@ const FileSelector = ({
|
||||
|
||||
return (
|
||||
<MyBox
|
||||
isLoading={isLoading}
|
||||
display={'flex'}
|
||||
flexDirection={'column'}
|
||||
alignItems={'center'}
|
||||
|
||||
@ -71,7 +71,7 @@ const CustomTextInput = () => {
|
||||
<Box maxW={['100%', '800px']}>
|
||||
<Box display={['block', 'flex']} alignItems={'center'}>
|
||||
<Box flex={'0 0 120px'} fontSize={'sm'}>
|
||||
{t('common:core.dataset.collection.Collection name')}
|
||||
{t('dataset:collection_name')}
|
||||
</Box>
|
||||
<Input
|
||||
flex={'1 0 0'}
|
||||
@ -79,7 +79,7 @@ const CustomTextInput = () => {
|
||||
{...register('name', {
|
||||
required: true
|
||||
})}
|
||||
placeholder={t('common:core.dataset.collection.Collection name')}
|
||||
placeholder={t('dataset:collection_name')}
|
||||
bg={'myGray.50'}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
@ -1,14 +1,20 @@
|
||||
import React, { useCallback, useEffect, useMemo, useState } from 'react';
|
||||
import { type ImportSourceItemType } from '@/web/core/dataset/type.d';
|
||||
import { Box, Button } from '@chakra-ui/react';
|
||||
import FileSelector from '../components/FileSelector';
|
||||
import FileSelector, { type SelectFileItemType } from '../components/FileSelector';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
|
||||
import dynamic from 'next/dynamic';
|
||||
import Loading from '@fastgpt/web/components/common/MyLoading';
|
||||
import { RenderUploadFiles } from '../components/RenderFiles';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetImportContext } from '../Context';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { uploadFile2DB } from '@/web/common/file/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import { getFileIcon } from '@fastgpt/global/common/file/icon';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
|
||||
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'));
|
||||
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
|
||||
@ -33,14 +39,16 @@ export default React.memo(FileLocal);
|
||||
|
||||
const SelectFile = React.memo(function SelectFile() {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const { goToNext, sources, setSources } = useContextSelector(DatasetImportContext, (v) => v);
|
||||
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
|
||||
|
||||
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
|
||||
sources.map((source) => ({
|
||||
isUploading: false,
|
||||
...source
|
||||
}))
|
||||
);
|
||||
const [uploading, setUploading] = useState(false);
|
||||
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
|
||||
|
||||
useEffect(() => {
|
||||
@ -53,15 +61,90 @@ const SelectFile = React.memo(function SelectFile() {
|
||||
goToNext();
|
||||
}, [goToNext]);
|
||||
|
||||
const { runAsync: onSelectFiles, loading: uploading } = useRequest2(
|
||||
async (files: SelectFileItemType[]) => {
|
||||
{
|
||||
await Promise.all(
|
||||
files.map(async ({ fileId, file }) => {
|
||||
try {
|
||||
const { fileId: uploadFileId } = await uploadFile2DB({
|
||||
file,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
data: {
|
||||
datasetId
|
||||
},
|
||||
percentListen: (e) => {
|
||||
setSelectFiles((state) =>
|
||||
state.map((item) =>
|
||||
item.id === fileId
|
||||
? {
|
||||
...item,
|
||||
uploadedFileRate: item.uploadedFileRate
|
||||
? Math.max(e, item.uploadedFileRate)
|
||||
: e
|
||||
}
|
||||
: item
|
||||
)
|
||||
);
|
||||
}
|
||||
});
|
||||
setSelectFiles((state) =>
|
||||
state.map((item) =>
|
||||
item.id === fileId
|
||||
? {
|
||||
...item,
|
||||
dbFileId: uploadFileId,
|
||||
isUploading: false,
|
||||
uploadedFileRate: 100
|
||||
}
|
||||
: item
|
||||
)
|
||||
);
|
||||
} catch (error) {
|
||||
setSelectFiles((state) =>
|
||||
state.map((item) =>
|
||||
item.id === fileId
|
||||
? {
|
||||
...item,
|
||||
isUploading: false,
|
||||
errorMsg: getErrText(error)
|
||||
}
|
||||
: item
|
||||
)
|
||||
);
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
},
|
||||
{
|
||||
onBefore([files]) {
|
||||
setSelectFiles((state) => {
|
||||
return [
|
||||
...state,
|
||||
...files.map<ImportSourceItemType>((selectFile) => {
|
||||
const { fileId, file } = selectFile;
|
||||
|
||||
return {
|
||||
id: fileId,
|
||||
createStatus: 'waiting',
|
||||
file,
|
||||
sourceName: file.name,
|
||||
sourceSize: formatFileSize(file.size),
|
||||
icon: getFileIcon(file.name),
|
||||
isUploading: true,
|
||||
uploadedFileRate: 0
|
||||
};
|
||||
})
|
||||
];
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<FileSelector
|
||||
fileType={fileType}
|
||||
selectFiles={selectFiles}
|
||||
setSelectFiles={setSelectFiles}
|
||||
onStartSelect={() => setUploading(true)}
|
||||
onFinishSelect={() => setUploading(false)}
|
||||
/>
|
||||
<FileSelector fileType={fileType} selectFiles={selectFiles} onSelectFiles={onSelectFiles} />
|
||||
|
||||
{/* render files */}
|
||||
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
|
||||
|
||||
@ -0,0 +1,189 @@
|
||||
import React, { useState } from 'react';
|
||||
import { Box, Button, Flex, Input, Image } from '@chakra-ui/react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { useRouter } from 'next/router';
|
||||
import { TabEnum } from '../../NavBar';
|
||||
import { createImageDatasetCollection } from '@/web/core/dataset/image/api';
|
||||
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import FileSelector, { type SelectFileItemType } from '../components/FileSelector';
|
||||
import type { ImportSourceItemType } from '@/web/core/dataset/type';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import { DatasetImportContext } from '../Context';
|
||||
import MyImage from '@fastgpt/web/components/common/Image/MyImage';
|
||||
|
||||
const fileType = '.jpg, .jpeg, .png';
|
||||
|
||||
const ImageDataset = () => {
|
||||
return <SelectFile />;
|
||||
};
|
||||
|
||||
export default React.memo(ImageDataset);
|
||||
|
||||
const SelectFile = React.memo(function SelectFile() {
|
||||
const { t } = useTranslation();
|
||||
const router = useRouter();
|
||||
|
||||
const parentId = useContextSelector(DatasetImportContext, (v) => v.parentId);
|
||||
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
|
||||
|
||||
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>([]);
|
||||
const [uploadProgress, setUploadProgress] = useState(0);
|
||||
|
||||
const { register, handleSubmit } = useForm({
|
||||
defaultValues: {
|
||||
name: ''
|
||||
}
|
||||
});
|
||||
|
||||
const onSelectFiles = (files: SelectFileItemType[]) => {
|
||||
setSelectFiles((pre) => {
|
||||
const formatFiles = Array.from(files).map<ImportSourceItemType>((item) => {
|
||||
const previewUrl = URL.createObjectURL(item.file);
|
||||
|
||||
return {
|
||||
id: getNanoid(),
|
||||
createStatus: 'waiting',
|
||||
file: item.file,
|
||||
sourceName: item.file.name,
|
||||
icon: previewUrl
|
||||
};
|
||||
});
|
||||
|
||||
return [...pre, ...formatFiles];
|
||||
});
|
||||
};
|
||||
const onRemoveFile = (index: number) => {
|
||||
setSelectFiles((prev) => {
|
||||
return prev.filter((_, i) => i !== index);
|
||||
});
|
||||
};
|
||||
|
||||
const { runAsync: onCreate, loading: creating } = useRequest2(
|
||||
async ({ name: collectionName }: { name: string }) => {
|
||||
return await createImageDatasetCollection({
|
||||
parentId,
|
||||
datasetId,
|
||||
collectionName,
|
||||
files: selectFiles.map((item) => item.file!).filter(Boolean),
|
||||
onUploadProgress: setUploadProgress
|
||||
});
|
||||
},
|
||||
{
|
||||
manual: true,
|
||||
successToast: t('common:create_success'),
|
||||
onSuccess() {
|
||||
router.replace({
|
||||
query: {
|
||||
datasetId: router.query.datasetId,
|
||||
currentTab: TabEnum.collectionCard
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
return (
|
||||
<Flex flexDirection={'column'} maxW={'850px'} mx={'auto'} mt={7}>
|
||||
<Flex alignItems="center" width="100%">
|
||||
<FormLabel required width={['100px', '140px']}>
|
||||
{t('dataset:collection_name')}
|
||||
</FormLabel>
|
||||
|
||||
<Input
|
||||
flex="0 0 400px"
|
||||
bg="myGray.50"
|
||||
placeholder={t('dataset:collection_name')}
|
||||
{...register('name', { required: true })}
|
||||
/>
|
||||
</Flex>
|
||||
|
||||
<Flex mt={7} alignItems="flex-start" width="100%">
|
||||
<FormLabel required width={['100px', '140px']}>
|
||||
{t('common:core.dataset.collection.Collection raw text')}
|
||||
</FormLabel>
|
||||
|
||||
<Box flex={'1 0 0'}>
|
||||
<Box>
|
||||
<FileSelector
|
||||
fileType={fileType}
|
||||
selectFiles={selectFiles}
|
||||
onSelectFiles={onSelectFiles}
|
||||
/>
|
||||
</Box>
|
||||
{selectFiles.length > 0 && (
|
||||
<Flex flexWrap={'wrap'} gap={4} mt={3} width="100%">
|
||||
{selectFiles.map((file, index) => (
|
||||
<Box
|
||||
key={index}
|
||||
w="100px"
|
||||
h={'100px'}
|
||||
position={'relative'}
|
||||
_hover={{
|
||||
'.close-icon': { display: 'block' }
|
||||
}}
|
||||
bg={'myGray.50'}
|
||||
borderRadius={'md'}
|
||||
border={'base'}
|
||||
borderStyle={'dashed'}
|
||||
p={1}
|
||||
>
|
||||
<MyImage
|
||||
src={file.icon}
|
||||
w="100%"
|
||||
h={'100%'}
|
||||
objectFit={'contain'}
|
||||
alt={file.sourceName}
|
||||
/>
|
||||
<MyIcon
|
||||
name={'closeSolid'}
|
||||
w={'1rem'}
|
||||
h={'1rem'}
|
||||
color={'myGray.700'}
|
||||
cursor={'pointer'}
|
||||
_hover={{ color: 'red.500' }}
|
||||
position={'absolute'}
|
||||
rounded={'full'}
|
||||
bg={'white'}
|
||||
right={'-8px'}
|
||||
top={'-2px'}
|
||||
onClick={() => onRemoveFile(index)}
|
||||
className="close-icon"
|
||||
display={['', 'none']}
|
||||
zIndex={10}
|
||||
/>
|
||||
</Box>
|
||||
))}
|
||||
</Flex>
|
||||
)}
|
||||
</Box>
|
||||
</Flex>
|
||||
|
||||
<Flex width="100%" justifyContent="flex-end" mt="9">
|
||||
<Button isDisabled={selectFiles.length === 0 || creating} onClick={handleSubmit(onCreate)}>
|
||||
{creating ? (
|
||||
uploadProgress >= 100 ? (
|
||||
<Box>{t('dataset:images_creating')}</Box>
|
||||
) : (
|
||||
<Box>{t('dataset:uploading_progress', { num: uploadProgress })}</Box>
|
||||
)
|
||||
) : selectFiles.length > 0 ? (
|
||||
<>
|
||||
<Box>
|
||||
{t('dataset:confirm_import_images', {
|
||||
num: selectFiles.length
|
||||
})}
|
||||
</Box>
|
||||
</>
|
||||
) : (
|
||||
<Box>{t('common:comfirn_create')}</Box>
|
||||
)}
|
||||
</Button>
|
||||
</Flex>
|
||||
</Flex>
|
||||
);
|
||||
});
|
||||
@ -37,7 +37,7 @@ const ReTraining = () => {
|
||||
apiFileId: collection.apiFileId,
|
||||
|
||||
createStatus: 'waiting',
|
||||
icon: getCollectionIcon(collection.type, collection.name),
|
||||
icon: getCollectionIcon({ type: collection.type, name: collection.name }),
|
||||
id: collection._id,
|
||||
isUploading: false,
|
||||
sourceName: collection.name,
|
||||
|
||||
@ -11,6 +11,7 @@ const FileCustomText = dynamic(() => import('./diffSource/FileCustomText'));
|
||||
const ExternalFileCollection = dynamic(() => import('./diffSource/ExternalFile'));
|
||||
const APIDatasetCollection = dynamic(() => import('./diffSource/APIDataset'));
|
||||
const ReTraining = dynamic(() => import('./diffSource/ReTraining'));
|
||||
const ImageDataset = dynamic(() => import('./diffSource/ImageDataset'));
|
||||
|
||||
const ImportDataset = () => {
|
||||
const importSource = useContextSelector(DatasetImportContext, (v) => v.importSource);
|
||||
@ -22,6 +23,8 @@ const ImportDataset = () => {
|
||||
if (importSource === ImportDataSourceEnum.fileCustom) return FileCustomText;
|
||||
if (importSource === ImportDataSourceEnum.externalFile) return ExternalFileCollection;
|
||||
if (importSource === ImportDataSourceEnum.apiDataset) return APIDatasetCollection;
|
||||
if (importSource === ImportDataSourceEnum.imageDataset) return ImageDataset;
|
||||
return null;
|
||||
}, [importSource]);
|
||||
|
||||
return ImportComponent ? (
|
||||
|
||||
@ -1,37 +1,39 @@
|
||||
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import { Box, Flex, Button, Textarea, ModalFooter, HStack, VStack } from '@chakra-ui/react';
|
||||
import { type UseFormRegister, useFieldArray, useForm } from 'react-hook-form';
|
||||
import { Box, Flex, Button, Textarea, ModalFooter, HStack, VStack, Image } from '@chakra-ui/react';
|
||||
import type { UseFormRegister } from 'react-hook-form';
|
||||
import { useFieldArray, useForm } from 'react-hook-form';
|
||||
import {
|
||||
postInsertData2Dataset,
|
||||
putDatasetDataById,
|
||||
getDatasetCollectionById,
|
||||
getDatasetDataItemById
|
||||
} from '@/web/core/dataset/api';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import { type DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import type { DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import DeleteIcon from '@fastgpt/web/components/common/Icon/delete';
|
||||
import { defaultCollectionDetail } from '@/web/core/dataset/constants';
|
||||
import MyBox from '@fastgpt/web/components/common/MyBox';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import styles from './styles.module.scss';
|
||||
import {
|
||||
DatasetDataIndexTypeEnum,
|
||||
getDatasetIndexMapData
|
||||
} from '@fastgpt/global/core/dataset/data/constants';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import FillRowTabs from '@fastgpt/web/components/common/Tabs/FillRowTabs';
|
||||
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
|
||||
import MyIconButton from '@fastgpt/web/components/common/Icon/button';
|
||||
import MyImage from '@/components/MyImage/index';
|
||||
|
||||
export type InputDataType = {
|
||||
q: string;
|
||||
a: string;
|
||||
imagePreivewUrl?: string;
|
||||
indexes: (Omit<DatasetDataIndexItemType, 'dataId'> & {
|
||||
dataId?: string; // pg data id
|
||||
fold: boolean;
|
||||
@ -40,7 +42,8 @@ export type InputDataType = {
|
||||
|
||||
enum TabEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa'
|
||||
qa = 'qa',
|
||||
image = 'image'
|
||||
}
|
||||
|
||||
const InputDataModal = ({
|
||||
@ -52,17 +55,16 @@ const InputDataModal = ({
|
||||
}: {
|
||||
collectionId: string;
|
||||
dataId?: string;
|
||||
defaultValue?: { q: string; a?: string };
|
||||
defaultValue?: { q?: string; a?: string; imagePreivewUrl?: string };
|
||||
onClose: () => void;
|
||||
onSuccess: (data: InputDataType & { dataId: string }) => void;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
const { embeddingModelList, defaultModels } = useSystemStore();
|
||||
|
||||
const [currentTab, setCurrentTab] = useState(TabEnum.chunk);
|
||||
const [currentTab, setCurrentTab] = useState<TabEnum>();
|
||||
|
||||
const { register, handleSubmit, reset, control } = useForm<InputDataType>();
|
||||
const { register, handleSubmit, reset, control, watch } = useForm<InputDataType>();
|
||||
const {
|
||||
fields: indexes,
|
||||
prepend: prependIndexes,
|
||||
@ -72,16 +74,24 @@ const InputDataModal = ({
|
||||
control,
|
||||
name: 'indexes'
|
||||
});
|
||||
const imagePreivewUrl = watch('imagePreivewUrl');
|
||||
|
||||
const { data: collection = defaultCollectionDetail } = useRequest2(
|
||||
() => {
|
||||
return getDatasetCollectionById(collectionId);
|
||||
},
|
||||
() => getDatasetCollectionById(collectionId),
|
||||
{
|
||||
manual: false,
|
||||
refreshDeps: [collectionId]
|
||||
refreshDeps: [collectionId],
|
||||
onSuccess(res) {
|
||||
if (res.type === DatasetCollectionTypeEnum.images) {
|
||||
setCurrentTab(TabEnum.image);
|
||||
} else {
|
||||
setCurrentTab(TabEnum.chunk);
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Get data
|
||||
const { loading: isFetchingData } = useRequest2(
|
||||
async () => {
|
||||
if (dataId) return getDatasetDataItemById(dataId);
|
||||
@ -93,8 +103,9 @@ const InputDataModal = ({
|
||||
onSuccess(res) {
|
||||
if (res) {
|
||||
reset({
|
||||
q: res.q,
|
||||
a: res.a,
|
||||
q: res.q || '',
|
||||
a: res.a || '',
|
||||
imagePreivewUrl: res.imagePreivewUrl,
|
||||
indexes: res.indexes.map((item) => ({
|
||||
...item,
|
||||
fold: true
|
||||
@ -102,54 +113,32 @@ const InputDataModal = ({
|
||||
});
|
||||
} else if (defaultValue) {
|
||||
reset({
|
||||
q: defaultValue.q,
|
||||
a: defaultValue.a
|
||||
q: defaultValue.q || '',
|
||||
a: defaultValue.a || '',
|
||||
imagePreivewUrl: defaultValue.imagePreivewUrl
|
||||
});
|
||||
}
|
||||
|
||||
if (res?.a || defaultValue?.a) {
|
||||
setCurrentTab(TabEnum.qa);
|
||||
}
|
||||
},
|
||||
onError(err) {
|
||||
toast({
|
||||
status: 'error',
|
||||
title: t(getErrText(err) as any)
|
||||
});
|
||||
onClose();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
const maxToken = useMemo(() => {
|
||||
const vectorModel =
|
||||
embeddingModelList.find((item) => item.model === collection.dataset.vectorModel) ||
|
||||
defaultModels.embedding;
|
||||
|
||||
return vectorModel?.maxToken || 3000;
|
||||
}, [collection.dataset.vectorModel, defaultModels.embedding, embeddingModelList]);
|
||||
|
||||
// import new data
|
||||
// Import new data
|
||||
const { runAsync: sureImportData, loading: isImporting } = useRequest2(
|
||||
async (e: InputDataType) => {
|
||||
if (!e.q) {
|
||||
return Promise.reject(t('common:dataset.data.input is empty'));
|
||||
}
|
||||
|
||||
const totalLength = e.q.length + (e.a?.length || 0);
|
||||
if (totalLength >= maxToken * 1.4) {
|
||||
return Promise.reject(t('common:core.dataset.data.Too Long'));
|
||||
}
|
||||
|
||||
const data = { ...e };
|
||||
|
||||
const dataId = await postInsertData2Dataset({
|
||||
const postData: any = {
|
||||
collectionId: collection._id,
|
||||
q: e.q,
|
||||
a: currentTab === TabEnum.qa ? e.a : '',
|
||||
// Contains no default index
|
||||
indexes: e.indexes?.filter((item) => !!item.text?.trim())
|
||||
});
|
||||
indexes: e.indexes.filter((item) => !!item.text?.trim())
|
||||
};
|
||||
|
||||
const dataId = await postInsertData2Dataset(postData);
|
||||
|
||||
return {
|
||||
...data,
|
||||
@ -166,23 +155,26 @@ const InputDataModal = ({
|
||||
a: '',
|
||||
indexes: []
|
||||
});
|
||||
|
||||
onSuccess(e);
|
||||
},
|
||||
errorToast: t('common:error.unKnow')
|
||||
errorToast: t('dataset:common.error.unKnow')
|
||||
}
|
||||
);
|
||||
|
||||
// update
|
||||
// Update data
|
||||
const { runAsync: onUpdateData, loading: isUpdating } = useRequest2(
|
||||
async (e: InputDataType) => {
|
||||
if (!dataId) return Promise.reject(t('common:error.unKnow'));
|
||||
|
||||
await putDatasetDataById({
|
||||
const updateData: any = {
|
||||
dataId,
|
||||
q: e.q,
|
||||
a: currentTab === TabEnum.qa ? e.a : '',
|
||||
indexes: e.indexes.filter((item) => !!item.text?.trim())
|
||||
});
|
||||
};
|
||||
|
||||
await putDatasetDataById(updateData);
|
||||
|
||||
return {
|
||||
dataId,
|
||||
@ -202,10 +194,18 @@ const InputDataModal = ({
|
||||
const isLoading = isFetchingData;
|
||||
|
||||
const icon = useMemo(
|
||||
() => getSourceNameIcon({ sourceName: collection.sourceName, sourceId: collection.sourceId }),
|
||||
() => getCollectionIcon({ type: collection.type, name: collection.sourceName }),
|
||||
[collection]
|
||||
);
|
||||
|
||||
const maxToken = useMemo(() => {
|
||||
const vectorModel =
|
||||
embeddingModelList.find((item) => item.model === collection.dataset.vectorModel) ||
|
||||
defaultModels.embedding;
|
||||
|
||||
return vectorModel?.maxToken || 2000;
|
||||
}, [collection.dataset.vectorModel, defaultModels.embedding, embeddingModelList]);
|
||||
|
||||
return (
|
||||
<MyModal
|
||||
isOpen={true}
|
||||
@ -243,17 +243,19 @@ const InputDataModal = ({
|
||||
>
|
||||
{/* Tab */}
|
||||
<Box px={[5, '3.25rem']}>
|
||||
<FillRowTabs
|
||||
list={[
|
||||
{ label: t('common:dataset_data_input_chunk'), value: TabEnum.chunk },
|
||||
{ label: t('common:dataset_data_input_qa'), value: TabEnum.qa }
|
||||
]}
|
||||
py={1}
|
||||
value={currentTab}
|
||||
onChange={(e) => {
|
||||
setCurrentTab(e);
|
||||
}}
|
||||
/>
|
||||
{(currentTab === TabEnum.chunk || currentTab === TabEnum.qa) && (
|
||||
<FillRowTabs
|
||||
list={[
|
||||
{ label: t('common:dataset_data_input_chunk'), value: TabEnum.chunk },
|
||||
{ label: t('common:dataset_data_input_qa'), value: TabEnum.qa }
|
||||
]}
|
||||
py={1}
|
||||
value={currentTab}
|
||||
onChange={(e) => {
|
||||
setCurrentTab(e);
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
<Flex flex={'1 0 0'} h={['auto', '0']} gap={6} flexDir={['column', 'row']} px={[5, '0']}>
|
||||
@ -268,45 +270,64 @@ const InputDataModal = ({
|
||||
w={['100%', 0]}
|
||||
overflow={['unset', 'auto']}
|
||||
>
|
||||
<Flex flexDir={'column'} h={'100%'}>
|
||||
<FormLabel required mb={1} h={'30px'}>
|
||||
{currentTab === TabEnum.chunk
|
||||
? t('common:dataset_data_input_chunk_content')
|
||||
: t('common:dataset_data_input_q')}
|
||||
</FormLabel>
|
||||
<Textarea
|
||||
resize={'none'}
|
||||
placeholder={t('common:dataset_data_import_q_placeholder', { maxToken })}
|
||||
className={styles.scrollbar}
|
||||
maxLength={maxToken}
|
||||
flex={'1 0 0'}
|
||||
tabIndex={1}
|
||||
_focus={{
|
||||
borderColor: 'primary.500',
|
||||
boxShadow: '0px 0px 0px 2.4px rgba(51, 112, 255, 0.15)',
|
||||
bg: 'white'
|
||||
}}
|
||||
bg={'myGray.25'}
|
||||
borderRadius={'md'}
|
||||
borderColor={'myGray.200'}
|
||||
{...register(`q`, {
|
||||
required: true
|
||||
})}
|
||||
/>
|
||||
<Flex flexDir={'column'} flex={'1 0 0'} h={0}>
|
||||
{currentTab === TabEnum.image && (
|
||||
<>
|
||||
<FormLabel required mb={1} h={'30px'}>
|
||||
{t('file:image')}
|
||||
</FormLabel>
|
||||
<Box flex={'1 0 0'} h={0} w="100%">
|
||||
<Box height="100%" position="relative" border="base" borderRadius={'md'} p={1}>
|
||||
<MyImage
|
||||
src={imagePreivewUrl}
|
||||
h="100%"
|
||||
w="100%"
|
||||
objectFit="contain"
|
||||
alt={t('file:Image_Preview')}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
{(currentTab === TabEnum.chunk || currentTab === TabEnum.qa) && (
|
||||
<>
|
||||
<FormLabel required mb={1} h={'30px'}>
|
||||
{currentTab === TabEnum.chunk
|
||||
? t('common:dataset_data_input_chunk_content')
|
||||
: t('common:dataset_data_input_q')}
|
||||
</FormLabel>
|
||||
|
||||
<Textarea
|
||||
resize={'none'}
|
||||
className={styles.scrollbar}
|
||||
flex={'1 0 0'}
|
||||
tabIndex={1}
|
||||
_focus={{
|
||||
borderColor: 'primary.500',
|
||||
boxShadow: '0px 0px 0px 2.4px rgba(51, 112, 255, 0.15)',
|
||||
bg: 'white'
|
||||
}}
|
||||
bg={'myGray.25'}
|
||||
borderRadius={'md'}
|
||||
borderColor={'myGray.200'}
|
||||
{...register(`q`, {
|
||||
required: true
|
||||
})}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</Flex>
|
||||
{currentTab === TabEnum.qa && (
|
||||
<Flex flexDir={'column'} h={'100%'}>
|
||||
<Flex flexDir={'column'} flex={'1 0 0'}>
|
||||
<FormLabel required mb={1}>
|
||||
{t('common:dataset_data_input_a')}
|
||||
</FormLabel>
|
||||
<Textarea
|
||||
resize={'none'}
|
||||
placeholder={t('common:dataset_data_import_q_placeholder', { maxToken })}
|
||||
className={styles.scrollbar}
|
||||
flex={'1 0 0'}
|
||||
tabIndex={1}
|
||||
bg={'myGray.25'}
|
||||
maxLength={maxToken}
|
||||
borderRadius={'md'}
|
||||
border={'1.5px solid '}
|
||||
borderColor={'myGray.200'}
|
||||
@ -314,6 +335,27 @@ const InputDataModal = ({
|
||||
/>
|
||||
</Flex>
|
||||
)}
|
||||
{currentTab === TabEnum.image && (
|
||||
<Flex flexDir={'column'} flex={'1 0 0'}>
|
||||
<FormLabel required mb={1}>
|
||||
{t('file:image_description')}
|
||||
</FormLabel>
|
||||
<Textarea
|
||||
resize={'none'}
|
||||
placeholder={t('file:image_description_tip')}
|
||||
className={styles.scrollbar}
|
||||
flex={'1 0 0'}
|
||||
tabIndex={1}
|
||||
bg={'myGray.25'}
|
||||
borderRadius={'md'}
|
||||
border={'1.5px solid '}
|
||||
borderColor={'myGray.200'}
|
||||
{...register('q', {
|
||||
required: true
|
||||
})}
|
||||
/>
|
||||
</Flex>
|
||||
)}
|
||||
</Flex>
|
||||
{/* Index */}
|
||||
<Box
|
||||
|
||||
@ -9,7 +9,8 @@ import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeMap,
|
||||
DatasetCollectionTypeMap
|
||||
DatasetCollectionTypeMap,
|
||||
DatasetCollectionTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
@ -38,6 +39,7 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
manual: false
|
||||
}
|
||||
);
|
||||
|
||||
const metadataList = useMemo<{ label?: string; value?: any }[]>(() => {
|
||||
if (!collection) return [];
|
||||
|
||||
@ -49,13 +51,17 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
value: t(DatasetCollectionTypeMap[collection.type]?.name as any)
|
||||
},
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.source name'),
|
||||
label: t('dataset:collection_name'),
|
||||
value: collection.file?.filename || collection?.rawLink || collection?.name
|
||||
},
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.source size'),
|
||||
value: collection.file ? formatFileSize(collection.file.length) : '-'
|
||||
},
|
||||
...(collection.file
|
||||
? [
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.source size'),
|
||||
value: formatFileSize(collection.file.length)
|
||||
}
|
||||
]
|
||||
: []),
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.Createtime'),
|
||||
value: formatTime2YMDHM(collection.createTime)
|
||||
@ -64,18 +70,30 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
label: t('common:core.dataset.collection.metadata.Updatetime'),
|
||||
value: formatTime2YMDHM(collection.updateTime)
|
||||
},
|
||||
{
|
||||
label: t('dataset:collection_metadata_custom_pdf_parse'),
|
||||
value: collection.customPdfParse ? 'Yes' : 'No'
|
||||
},
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.Raw text length'),
|
||||
value: collection.rawTextLength ?? '-'
|
||||
},
|
||||
{
|
||||
label: t('dataset:collection.training_type'),
|
||||
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
|
||||
},
|
||||
...(collection.customPdfParse !== undefined
|
||||
? [
|
||||
{
|
||||
label: t('dataset:collection_metadata_custom_pdf_parse'),
|
||||
value: collection.customPdfParse ? 'Yes' : 'No'
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(collection.rawTextLength !== undefined
|
||||
? [
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.Raw text length'),
|
||||
value: collection.rawTextLength
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(DatasetCollectionDataProcessModeMap[collection.trainingType]
|
||||
? [
|
||||
{
|
||||
label: t('dataset:collection.training_type'),
|
||||
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(collection.imageIndex !== undefined
|
||||
? [
|
||||
{
|
||||
@ -92,7 +110,7 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(collection.chunkSize
|
||||
...(collection.chunkSize !== undefined
|
||||
? [
|
||||
{
|
||||
label: t('dataset:chunk_size'),
|
||||
@ -100,7 +118,7 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(collection.indexSize
|
||||
...(collection.indexSize !== undefined
|
||||
? [
|
||||
{
|
||||
label: t('dataset:index_size'),
|
||||
@ -108,7 +126,7 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
}
|
||||
]
|
||||
: []),
|
||||
...(webSelector
|
||||
...(webSelector !== undefined
|
||||
? [
|
||||
{
|
||||
label: t('common:core.dataset.collection.metadata.Web page selector'),
|
||||
@ -116,16 +134,14 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
|
||||
}
|
||||
]
|
||||
: []),
|
||||
{
|
||||
...(collection.tags
|
||||
? [
|
||||
{
|
||||
label: t('dataset:collection_tags'),
|
||||
value: collection.tags?.join(', ') || '-'
|
||||
}
|
||||
]
|
||||
: [])
|
||||
}
|
||||
...(collection.tags
|
||||
? [
|
||||
{
|
||||
label: t('dataset:collection_tags'),
|
||||
value: collection.tags?.join(', ') || '-'
|
||||
}
|
||||
]
|
||||
: [])
|
||||
];
|
||||
}, [collection, t]);
|
||||
|
||||
|
||||
@ -456,7 +456,7 @@ const TestResults = React.memo(function TestResults({
|
||||
<Box mt={1} gap={4}>
|
||||
{datasetTestItem?.results.map((item, index) => (
|
||||
<Box key={item.id} p={3} borderRadius={'lg'} bg={'myGray.100'} _notLast={{ mb: 2 }}>
|
||||
<QuoteItem quoteItem={item} canViewSource />
|
||||
<QuoteItem quoteItem={item} canViewSource canEditData />
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
|
||||
@ -25,7 +25,7 @@ const FileSelector = ({
|
||||
}: {
|
||||
fileType: string;
|
||||
selectFiles: SelectFileItemType[];
|
||||
setSelectFiles: React.Dispatch<React.SetStateAction<SelectFileItemType[]>>;
|
||||
setSelectFiles: (files: SelectFileItemType[]) => void;
|
||||
maxCount?: number;
|
||||
} & FlexProps) => {
|
||||
const { t } = useTranslation();
|
||||
@ -62,11 +62,11 @@ const FileSelector = ({
|
||||
name: file.name,
|
||||
size: formatFileSize(file.size)
|
||||
}));
|
||||
setSelectFiles((state) => {
|
||||
return [...fileList, ...state].slice(0, maxCount);
|
||||
});
|
||||
|
||||
const newFiles = [...fileList, ...selectFiles].slice(0, maxCount);
|
||||
setSelectFiles(newFiles);
|
||||
},
|
||||
[maxCount, setSelectFiles]
|
||||
[maxCount, selectFiles, setSelectFiles]
|
||||
);
|
||||
|
||||
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
|
||||
|
||||
@ -0,0 +1,138 @@
|
||||
import MyModal from '@fastgpt/web/components/common/MyModal';
|
||||
import React, { useState } from 'react';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { Box, Button, Flex, ModalBody, ModalFooter } from '@chakra-ui/react';
|
||||
import FileSelector, { type SelectFileItemType } from '../components/FileSelector';
|
||||
import MyImage from '@/components/MyImage';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import { insertImagesToCollection } from '@/web/core/dataset/image/api';
|
||||
|
||||
const fileType = '.jpg, .jpeg, .png';
|
||||
type MySelectFileItemType = SelectFileItemType & { previewUrl: string };
|
||||
|
||||
const InsertImageModal = ({
|
||||
collectionId,
|
||||
onClose
|
||||
}: {
|
||||
collectionId: string;
|
||||
onClose: () => void;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const [selectFiles, setSelectFiles] = useState<MySelectFileItemType[]>([]);
|
||||
const onSelectFiles = (files: SelectFileItemType[]) => {
|
||||
setSelectFiles((pre) => {
|
||||
const formatFiles = Array.from(files).map<MySelectFileItemType>((item) => {
|
||||
const previewUrl = URL.createObjectURL(item.file);
|
||||
|
||||
return {
|
||||
...item,
|
||||
previewUrl
|
||||
};
|
||||
});
|
||||
|
||||
return [...pre, ...formatFiles];
|
||||
});
|
||||
};
|
||||
|
||||
const onRemoveFile = (index: number) => {
|
||||
setSelectFiles((prev) => {
|
||||
return prev.filter((_, i) => i !== index);
|
||||
});
|
||||
};
|
||||
|
||||
const [uploadProgress, setUploadProgress] = useState(0);
|
||||
const { runAsync: onInsertImages, loading: inserting } = useRequest2(
|
||||
async () => {
|
||||
return await insertImagesToCollection({
|
||||
collectionId,
|
||||
files: selectFiles.map((item) => item.file!).filter(Boolean),
|
||||
onUploadProgress: setUploadProgress
|
||||
});
|
||||
},
|
||||
{
|
||||
manual: true,
|
||||
successToast: t('dataset:insert_images_success'),
|
||||
onSuccess() {
|
||||
onClose();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
return (
|
||||
<MyModal
|
||||
isOpen
|
||||
iconSrc="core/dataset/imageFill"
|
||||
title={t('dataset:insert_images')}
|
||||
maxW={['90vw', '605px']}
|
||||
>
|
||||
<ModalBody userSelect={'none'}>
|
||||
<Box>
|
||||
<FileSelector
|
||||
fileType={fileType}
|
||||
selectFiles={selectFiles}
|
||||
setSelectFiles={onSelectFiles}
|
||||
/>
|
||||
</Box>
|
||||
{selectFiles.length > 0 && (
|
||||
<Flex flexWrap={'wrap'} gap={3} mt={3} width="100%">
|
||||
{selectFiles.map((file, index) => (
|
||||
<Box
|
||||
key={index}
|
||||
w="100px"
|
||||
h={'100px'}
|
||||
position={'relative'}
|
||||
_hover={{
|
||||
'.close-icon': { display: 'block' }
|
||||
}}
|
||||
bg={'myGray.50'}
|
||||
borderRadius={'md'}
|
||||
border={'base'}
|
||||
borderStyle={'dashed'}
|
||||
p={1}
|
||||
>
|
||||
<MyImage src={file.previewUrl} w="100%" h={'100%'} objectFit={'contain'} />
|
||||
<MyIcon
|
||||
name={'closeSolid'}
|
||||
w={'1rem'}
|
||||
h={'1rem'}
|
||||
color={'myGray.700'}
|
||||
cursor={'pointer'}
|
||||
_hover={{ color: 'red.500' }}
|
||||
position={'absolute'}
|
||||
rounded={'full'}
|
||||
bg={'white'}
|
||||
right={'-8px'}
|
||||
top={'-2px'}
|
||||
onClick={() => onRemoveFile(index)}
|
||||
className="close-icon"
|
||||
display={['', 'none']}
|
||||
zIndex={10}
|
||||
/>
|
||||
</Box>
|
||||
))}
|
||||
</Flex>
|
||||
)}
|
||||
</ModalBody>
|
||||
<ModalFooter>
|
||||
<Button isDisabled={inserting} variant={'whitePrimary'} mr={4} onClick={onClose}>
|
||||
{t('common:Cancel')}
|
||||
</Button>
|
||||
<Button
|
||||
isDisabled={selectFiles.length === 0 || inserting}
|
||||
variant={'primary'}
|
||||
onClick={onInsertImages}
|
||||
>
|
||||
{inserting ? (
|
||||
<Box>{t('dataset:uploading_progress', { num: uploadProgress })}</Box>
|
||||
) : (
|
||||
<Box>{t('common:Confirm')}</Box>
|
||||
)}
|
||||
</Button>
|
||||
</ModalFooter>
|
||||
</MyModal>
|
||||
);
|
||||
};
|
||||
|
||||
export default InsertImageModal;
|
||||
@ -38,7 +38,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const upload = getUploadModel({
|
||||
maxSize: global.feConfigs?.uploadFileMaxSize
|
||||
});
|
||||
const { file, bucketName, metadata, data } = await upload.doUpload<
|
||||
const { file, bucketName, metadata, data } = await upload.getUploadFile<
|
||||
UploadChatFileProps | UploadDatasetFileProps
|
||||
>(req, res);
|
||||
filePaths.push(file.path);
|
||||
|
||||
@ -15,7 +15,7 @@ export type getResDataQuery = OutLinkChatAuthProps & {
|
||||
|
||||
export type getResDataBody = {};
|
||||
|
||||
export type getResDataResponse = ChatHistoryItemResType[] | {};
|
||||
export type getResDataResponse = ChatHistoryItemResType[] | [];
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<getResDataBody, getResDataQuery>,
|
||||
@ -23,7 +23,7 @@ async function handler(
|
||||
): Promise<getResDataResponse> {
|
||||
const { appId, chatId, dataId, shareId } = req.query;
|
||||
if (!appId || !chatId || !dataId) {
|
||||
return {};
|
||||
return [];
|
||||
}
|
||||
|
||||
const [{ responseDetail }, chatData] = await Promise.all([
|
||||
@ -44,10 +44,10 @@ async function handler(
|
||||
]);
|
||||
|
||||
if (chatData?.obj !== ChatRoleEnum.AI) {
|
||||
return {};
|
||||
return [];
|
||||
}
|
||||
|
||||
const flowResponses = chatData.responseData ?? {};
|
||||
const flowResponses = chatData.responseData ?? [];
|
||||
return req.query.shareId
|
||||
? filterPublicNodeResponseData({
|
||||
responseDetail,
|
||||
|
||||
@ -12,6 +12,7 @@ import { quoteDataFieldSelector, type QuoteDataItemType } from '@/service/core/c
|
||||
import { processChatTimeFilter } from '@/service/core/chat/utils';
|
||||
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
|
||||
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
|
||||
import { getFormatDatasetCiteList } from '@fastgpt/service/core/dataset/data/controller';
|
||||
|
||||
export type GetCollectionQuoteProps = LinkedPaginationProps & {
|
||||
chatId: string;
|
||||
@ -139,7 +140,7 @@ async function handleInitialLoad({
|
||||
const hasMoreNext = list.length === pageSize;
|
||||
|
||||
return {
|
||||
list: processChatTimeFilter(list, chatTime),
|
||||
list: processChatTimeFilter(getFormatDatasetCiteList(list), chatTime),
|
||||
hasMorePrev: false,
|
||||
hasMoreNext
|
||||
};
|
||||
@ -164,7 +165,7 @@ async function handleInitialLoad({
|
||||
const resultList = [...prevList, centerNode, ...nextList];
|
||||
|
||||
return {
|
||||
list: processChatTimeFilter(resultList, chatTime),
|
||||
list: processChatTimeFilter(getFormatDatasetCiteList(resultList), chatTime),
|
||||
hasMorePrev,
|
||||
hasMoreNext
|
||||
};
|
||||
@ -192,7 +193,7 @@ async function handlePaginatedLoad({
|
||||
? await getPrevNodes(prevId, prevIndex, pageSize, baseMatch)
|
||||
: await getNextNodes(nextId!, nextIndex!, pageSize, baseMatch);
|
||||
|
||||
const processedList = processChatTimeFilter(list, chatTime);
|
||||
const processedList = processChatTimeFilter(getFormatDatasetCiteList(list), chatTime);
|
||||
|
||||
return {
|
||||
list: processedList,
|
||||
|
||||
@ -5,6 +5,10 @@ import { type ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { quoteDataFieldSelector, type QuoteDataItemType } from '@/service/core/chat/constants';
|
||||
import { processChatTimeFilter } from '@/service/core/chat/utils';
|
||||
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
|
||||
import {
|
||||
formatDatasetDataValue,
|
||||
getFormatDatasetCiteList
|
||||
} from '@fastgpt/service/core/dataset/data/controller';
|
||||
|
||||
export type GetQuoteProps = {
|
||||
datasetDataIdList: string[];
|
||||
@ -56,7 +60,10 @@ async function handler(req: ApiRequestProps<GetQuoteProps>): Promise<GetQuotesRe
|
||||
quoteDataFieldSelector
|
||||
).lean();
|
||||
|
||||
const quoteList = processChatTimeFilter(list, chatItem.time);
|
||||
// Get image preview url
|
||||
const formatPreviewUrlList = getFormatDatasetCiteList(list);
|
||||
|
||||
const quoteList = processChatTimeFilter(formatPreviewUrlList, chatItem.time);
|
||||
|
||||
return quoteList;
|
||||
}
|
||||
|
||||
@ -25,7 +25,7 @@ async function handler(req: ApiRequestProps<backupBody, backupQuery>, res: ApiRe
|
||||
const upload = getUploadModel({
|
||||
maxSize: global.feConfigs?.uploadFileMaxSize
|
||||
});
|
||||
const { file, data } = await upload.doUpload<{ datasetId: string }>(req, res);
|
||||
const { file, data } = await upload.getUploadFile<{ datasetId: string }>(req, res);
|
||||
filePaths.push(file.path);
|
||||
|
||||
if (file.mimetype !== 'text/csv') {
|
||||
|
||||
@ -0,0 +1,104 @@
|
||||
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import type { ImageCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetCollectionDataProcessModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { type ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import type { CreateCollectionResponse } from '@/global/core/dataset/api';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/multer';
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { i18nT } from '@fastgpt/web/i18n/utils';
|
||||
import { authFrequencyLimit } from '@/service/common/frequencyLimit/api';
|
||||
import { addSeconds } from 'date-fns';
|
||||
import { createDatasetImage } from '@fastgpt/service/core/dataset/image/controller';
|
||||
|
||||
const authUploadLimit = (tmbId: string, num: number) => {
|
||||
if (!global.feConfigs.uploadFileMaxAmount) return;
|
||||
return authFrequencyLimit({
|
||||
eventId: `${tmbId}-uploadfile`,
|
||||
maxAmount: global.feConfigs.uploadFileMaxAmount * 2,
|
||||
expiredTime: addSeconds(new Date(), 30), // 30s
|
||||
num
|
||||
});
|
||||
};
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<ImageCreateDatasetCollectionParams>,
|
||||
res: NextApiResponse<any>
|
||||
): CreateCollectionResponse {
|
||||
const filePaths: string[] = [];
|
||||
|
||||
try {
|
||||
const upload = getUploadModel({
|
||||
maxSize: global.feConfigs?.uploadFileMaxSize
|
||||
});
|
||||
const {
|
||||
files,
|
||||
data: { parentId, datasetId, collectionName }
|
||||
} = await upload.getUploadFiles<ImageCreateDatasetCollectionParams>(req, res);
|
||||
filePaths.push(...files.map((item) => item.path));
|
||||
|
||||
const { dataset, teamId, tmbId } = await authDataset({
|
||||
datasetId,
|
||||
per: WritePermissionVal,
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true
|
||||
});
|
||||
await authUploadLimit(tmbId, files.length);
|
||||
|
||||
if (!dataset.vlmModel) {
|
||||
return Promise.reject(i18nT('file:Image_dataset_requires_VLM_model_to_be_configured'));
|
||||
}
|
||||
|
||||
// 1. Save image to db
|
||||
const imageIds = await Promise.all(
|
||||
files.map(async (file) => {
|
||||
return (
|
||||
await createDatasetImage({
|
||||
teamId,
|
||||
datasetId,
|
||||
file
|
||||
})
|
||||
).imageId;
|
||||
})
|
||||
);
|
||||
|
||||
// 2. Create collection
|
||||
const { collectionId, insertResults } = await createCollectionAndInsertData({
|
||||
dataset,
|
||||
imageIds,
|
||||
createCollectionParams: {
|
||||
parentId,
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId,
|
||||
type: DatasetCollectionTypeEnum.images,
|
||||
name: collectionName,
|
||||
trainingType: DatasetCollectionDataProcessModeEnum.imageParse
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
collectionId,
|
||||
results: insertResults
|
||||
};
|
||||
} catch (error) {
|
||||
return Promise.reject(error);
|
||||
} finally {
|
||||
removeFilesByPaths(filePaths);
|
||||
}
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: false
|
||||
}
|
||||
};
|
||||
@ -21,11 +21,12 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>): CreateCo
|
||||
const upload = getUploadModel({
|
||||
maxSize: global.feConfigs?.uploadFileMaxSize
|
||||
});
|
||||
const { file, data, bucketName } = await upload.doUpload<FileCreateDatasetCollectionParams>(
|
||||
req,
|
||||
res,
|
||||
BucketNameEnum.dataset
|
||||
);
|
||||
const { file, data, bucketName } =
|
||||
await upload.getUploadFile<FileCreateDatasetCollectionParams>(
|
||||
req,
|
||||
res,
|
||||
BucketNameEnum.dataset
|
||||
);
|
||||
filePaths = [file.path];
|
||||
|
||||
if (!file || !bucketName) {
|
||||
|
||||
@ -31,7 +31,8 @@ const defaultCounts: Record<TrainingModeEnum, number> = {
|
||||
qa: 0,
|
||||
chunk: 0,
|
||||
image: 0,
|
||||
auto: 0
|
||||
auto: 0,
|
||||
imageParse: 0
|
||||
};
|
||||
|
||||
async function handler(
|
||||
|
||||
@ -1,21 +1,26 @@
|
||||
import type { NextApiRequest } from 'next';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { authDatasetData } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import type { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
|
||||
export type Response = {
|
||||
id: string;
|
||||
q: string;
|
||||
a: string;
|
||||
imageId?: string;
|
||||
source: string;
|
||||
};
|
||||
|
||||
async function handler(req: NextApiRequest) {
|
||||
const { id: dataId } = req.query as {
|
||||
id: string;
|
||||
};
|
||||
async function handler(
|
||||
req: ApiRequestProps<
|
||||
{},
|
||||
{
|
||||
id: string;
|
||||
}
|
||||
>
|
||||
) {
|
||||
const { id: dataId } = req.query;
|
||||
|
||||
// 凭证校验
|
||||
const { datasetData } = await authDatasetData({
|
||||
req,
|
||||
authToken: true,
|
||||
|
||||
@ -13,7 +13,7 @@ import { i18nT } from '@fastgpt/web/i18n/utils';
|
||||
export type GetQuoteDataResponse = {
|
||||
collection: DatasetCollectionSchemaType;
|
||||
q: string;
|
||||
a: string;
|
||||
a?: string;
|
||||
};
|
||||
|
||||
export type GetQuoteDataProps =
|
||||
|
||||
@ -10,7 +10,7 @@ import { insertData2Dataset } from '@/service/core/dataset/data/controller';
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
|
||||
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
|
||||
import { type InsertOneDatasetDataProps } from '@/global/core/dataset/api';
|
||||
import type { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
|
||||
import { simpleText } from '@fastgpt/global/common/string/tools';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
@ -25,11 +25,11 @@ async function handler(req: NextApiRequest) {
|
||||
const { collectionId, q, a, indexes } = req.body as InsertOneDatasetDataProps;
|
||||
|
||||
if (!q) {
|
||||
Promise.reject(CommonErrEnum.missingParams);
|
||||
return Promise.reject(CommonErrEnum.missingParams);
|
||||
}
|
||||
|
||||
if (!collectionId) {
|
||||
Promise.reject(CommonErrEnum.missingParams);
|
||||
return Promise.reject(CommonErrEnum.missingParams);
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
@ -46,14 +46,12 @@ async function handler(req: NextApiRequest) {
|
||||
insertLen: 1
|
||||
});
|
||||
|
||||
// auth collection and get dataset
|
||||
const [
|
||||
{
|
||||
dataset: { _id: datasetId, vectorModel, agentModel }
|
||||
}
|
||||
] = await Promise.all([getCollectionWithDataset(collectionId)]);
|
||||
|
||||
// format data
|
||||
const formatQ = simpleText(q);
|
||||
const formatA = simpleText(a);
|
||||
const formatIndexes = indexes?.map((item) => ({
|
||||
@ -61,7 +59,6 @@ async function handler(req: NextApiRequest) {
|
||||
text: simpleText(item.text)
|
||||
}));
|
||||
|
||||
// token check
|
||||
const token = await countPromptTokens(formatQ + formatA, '');
|
||||
const vectorModelData = getEmbeddingModel(vectorModel);
|
||||
const llmModelData = getLLMModel(agentModel);
|
||||
@ -71,7 +68,6 @@ async function handler(req: NextApiRequest) {
|
||||
return Promise.reject(`Content over max chunk size: ${maxChunkSize}`);
|
||||
}
|
||||
|
||||
// Duplicate data check
|
||||
await hasSameValue({
|
||||
teamId,
|
||||
datasetId,
|
||||
@ -99,7 +95,7 @@ async function handler(req: NextApiRequest) {
|
||||
model: vectorModelData.model
|
||||
});
|
||||
|
||||
(async () => {
|
||||
(() => {
|
||||
addOperationLog({
|
||||
tmbId,
|
||||
teamId,
|
||||
|
||||
130
projects/app/src/pages/api/core/dataset/data/insertImages.ts
Normal file
130
projects/app/src/pages/api/core/dataset/data/insertImages.ts
Normal file
@ -0,0 +1,130 @@
|
||||
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authFrequencyLimit } from '@/service/common/frequencyLimit/api';
|
||||
import { addSeconds } from 'date-fns';
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/multer';
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { createDatasetImage } from '@fastgpt/service/core/dataset/image/controller';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getEmbeddingModel, getLLMModel, getVlmModel } from '@fastgpt/service/core/ai/model';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { removeDatasetImageExpiredTime } from '@fastgpt/service/core/dataset/image/utils';
|
||||
|
||||
export type insertImagesQuery = {};
|
||||
|
||||
export type insertImagesBody = {
|
||||
collectionId: string;
|
||||
};
|
||||
|
||||
export type insertImagesResponse = {};
|
||||
|
||||
const authUploadLimit = (tmbId: string, num: number) => {
|
||||
if (!global.feConfigs.uploadFileMaxAmount) return;
|
||||
return authFrequencyLimit({
|
||||
eventId: `${tmbId}-uploadfile`,
|
||||
maxAmount: global.feConfigs.uploadFileMaxAmount * 2,
|
||||
expiredTime: addSeconds(new Date(), 30), // 30s
|
||||
num
|
||||
});
|
||||
};
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<insertImagesBody, insertImagesQuery>,
|
||||
res: ApiResponseType<any>
|
||||
): Promise<insertImagesResponse> {
|
||||
const filePaths: string[] = [];
|
||||
|
||||
try {
|
||||
const upload = getUploadModel({
|
||||
maxSize: global.feConfigs?.uploadFileMaxSize
|
||||
});
|
||||
const {
|
||||
files,
|
||||
data: { collectionId }
|
||||
} = await upload.getUploadFiles<insertImagesBody>(req, res);
|
||||
filePaths.push(...files.map((item) => item.path));
|
||||
|
||||
const { collection, teamId, tmbId } = await authDatasetCollection({
|
||||
collectionId,
|
||||
per: WritePermissionVal,
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true
|
||||
});
|
||||
const dataset = collection.dataset;
|
||||
|
||||
await authUploadLimit(tmbId, files.length);
|
||||
|
||||
// 1. Upload images to db
|
||||
const imageIds = await Promise.all(
|
||||
files.map(async (file) => {
|
||||
return (
|
||||
await createDatasetImage({
|
||||
teamId,
|
||||
datasetId: dataset._id,
|
||||
file
|
||||
})
|
||||
).imageId;
|
||||
})
|
||||
);
|
||||
|
||||
// 2. Insert images to training queue
|
||||
await mongoSessionRun(async (session) => {
|
||||
const traingBillId = await (async () => {
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: collection.name,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getEmbeddingModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
vllmModel: getVlmModel(dataset.vlmModel)?.name,
|
||||
session
|
||||
});
|
||||
return billId;
|
||||
})();
|
||||
|
||||
await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
vlmModel: dataset.vlmModel,
|
||||
mode: TrainingModeEnum.imageParse,
|
||||
billId: traingBillId,
|
||||
data: imageIds.map((item, index) => ({
|
||||
imageId: item
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
// 3. Clear ttl
|
||||
await removeDatasetImageExpiredTime({
|
||||
ids: imageIds,
|
||||
collectionId,
|
||||
session
|
||||
});
|
||||
});
|
||||
|
||||
return {};
|
||||
} catch (error) {
|
||||
return Promise.reject(error);
|
||||
} finally {
|
||||
removeFilesByPaths(filePaths);
|
||||
}
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: false
|
||||
}
|
||||
};
|
||||
@ -43,7 +43,7 @@ async function handler(req: ApiRequestProps<UpdateDatasetDataProps>) {
|
||||
model: vectorModel
|
||||
});
|
||||
|
||||
(async () => {
|
||||
(() => {
|
||||
addOperationLog({
|
||||
tmbId,
|
||||
teamId,
|
||||
@ -55,10 +55,6 @@ async function handler(req: ApiRequestProps<UpdateDatasetDataProps>) {
|
||||
}
|
||||
});
|
||||
})();
|
||||
} else {
|
||||
// await MongoDatasetData.findByIdAndUpdate(dataId, {
|
||||
// ...(forbid !== undefined && { forbid })
|
||||
// });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -3,10 +3,13 @@ import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { replaceRegChars } from '@fastgpt/global/common/string/tools';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
|
||||
import { type ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { type DatasetDataListItemType } from '@/global/core/dataset/type';
|
||||
import { type PaginationProps, type PaginationResponse } from '@fastgpt/web/common/fetch/type';
|
||||
import type { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import type { DatasetDataListItemType } from '@/global/core/dataset/type';
|
||||
import type { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
|
||||
import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
|
||||
import { MongoDatasetImageSchema } from '@fastgpt/service/core/dataset/image/schema';
|
||||
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
|
||||
import { getDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
|
||||
|
||||
export type GetDatasetDataListProps = PaginationProps & {
|
||||
searchText?: string;
|
||||
@ -22,7 +25,6 @@ async function handler(
|
||||
|
||||
pageSize = Math.min(pageSize, 30);
|
||||
|
||||
// 凭证校验
|
||||
const { teamId, collection } = await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
@ -44,7 +46,7 @@ async function handler(
|
||||
};
|
||||
|
||||
const [list, total] = await Promise.all([
|
||||
MongoDatasetData.find(match, '_id datasetId collectionId q a chunkIndex')
|
||||
MongoDatasetData.find(match, '_id datasetId collectionId q a chunkIndex imageId teamId')
|
||||
.sort({ chunkIndex: 1, _id: -1 })
|
||||
.skip(offset)
|
||||
.limit(pageSize)
|
||||
@ -52,8 +54,41 @@ async function handler(
|
||||
MongoDatasetData.countDocuments(match)
|
||||
]);
|
||||
|
||||
const imageIds = list.map((item) => item.imageId!).filter(Boolean);
|
||||
const imageSizeMap = new Map<string, number>();
|
||||
|
||||
if (imageIds.length > 0) {
|
||||
const imageInfos = await MongoDatasetImageSchema.find(
|
||||
{ _id: { $in: imageIds } },
|
||||
'_id length',
|
||||
{
|
||||
...readFromSecondary
|
||||
}
|
||||
).lean();
|
||||
|
||||
imageInfos.forEach((item) => {
|
||||
imageSizeMap.set(String(item._id), item.length);
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
list,
|
||||
list: list.map((item) => {
|
||||
const imageSize = item.imageId ? imageSizeMap.get(String(item.imageId)) : undefined;
|
||||
const imagePreviewUrl = item.imageId
|
||||
? getDatasetImagePreviewUrl({
|
||||
imageId: item.imageId,
|
||||
teamId,
|
||||
datasetId: collection.datasetId,
|
||||
expiredMinutes: 30
|
||||
})
|
||||
: undefined;
|
||||
|
||||
return {
|
||||
...item,
|
||||
imageSize,
|
||||
imagePreviewUrl
|
||||
};
|
||||
}),
|
||||
total
|
||||
};
|
||||
}
|
||||
|
||||
@ -45,6 +45,7 @@ async function handler(req: NextApiRequest) {
|
||||
datasetId: { $in: datasetIds }
|
||||
});
|
||||
|
||||
// Remove cron job
|
||||
await Promise.all(
|
||||
datasets.map((dataset) => {
|
||||
if (dataset.type === DatasetTypeEnum.websiteDataset)
|
||||
|
||||
57
projects/app/src/pages/api/core/dataset/image/[imageId].ts
Normal file
57
projects/app/src/pages/api/core/dataset/image/[imageId].ts
Normal file
@ -0,0 +1,57 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { getDownloadStream, getFileById } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import type { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { authDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
|
||||
import { getDatasetImageReadData } from '@fastgpt/service/core/dataset/image/controller';
|
||||
|
||||
const previewableExtensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp'];
|
||||
|
||||
export default async function handler(
|
||||
req: ApiRequestProps<
|
||||
{},
|
||||
{
|
||||
imageId: string;
|
||||
token: string;
|
||||
}
|
||||
>,
|
||||
res: NextApiResponse<any>
|
||||
) {
|
||||
try {
|
||||
const { imageId, token } = req.query;
|
||||
|
||||
if (!imageId || !token) {
|
||||
return jsonRes(res, {
|
||||
code: 401,
|
||||
error: 'ImageId not found'
|
||||
});
|
||||
}
|
||||
|
||||
// Verify token and permissions
|
||||
await authDatasetImagePreviewUrl(token);
|
||||
|
||||
const { fileInfo, stream } = await getDatasetImageReadData(imageId);
|
||||
|
||||
// Set response headers
|
||||
res.setHeader('Content-Type', fileInfo.contentType);
|
||||
res.setHeader('Cache-Control', 'public, max-age=31536000');
|
||||
res.setHeader('Content-Length', fileInfo.length);
|
||||
|
||||
stream.pipe(res);
|
||||
stream.on('error', (error) => {
|
||||
if (!res.headersSent) {
|
||||
res.status(500).end();
|
||||
}
|
||||
});
|
||||
stream.on('end', () => {
|
||||
res.end();
|
||||
});
|
||||
} catch (error) {
|
||||
return jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -3,6 +3,7 @@ import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/sch
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { type ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { getDatasetImagePreviewUrl } from '@fastgpt/service/core/dataset/image/utils';
|
||||
|
||||
export type getTrainingDataDetailQuery = {};
|
||||
|
||||
@ -17,8 +18,9 @@ export type getTrainingDataDetailResponse =
|
||||
_id: string;
|
||||
datasetId: string;
|
||||
mode: string;
|
||||
q: string;
|
||||
a: string;
|
||||
q?: string;
|
||||
a?: string;
|
||||
imagePreviewUrl?: string;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
@ -44,6 +46,14 @@ async function handler(
|
||||
_id: data._id,
|
||||
datasetId: data.datasetId,
|
||||
mode: data.mode,
|
||||
imagePreviewUrl: data.imageId
|
||||
? getDatasetImagePreviewUrl({
|
||||
imageId: data.imageId,
|
||||
teamId,
|
||||
datasetId,
|
||||
expiredMinutes: 30
|
||||
})
|
||||
: undefined,
|
||||
q: data.q,
|
||||
a: data.a
|
||||
};
|
||||
|
||||
@ -3,7 +3,7 @@ import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/sch
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { type ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
import { addMinutes } from 'date-fns';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
export type updateTrainingDataBody = {
|
||||
datasetId: string;
|
||||
@ -37,21 +37,41 @@ async function handler(
|
||||
return Promise.reject('data not found');
|
||||
}
|
||||
|
||||
await MongoDatasetTraining.updateOne(
|
||||
{
|
||||
teamId,
|
||||
datasetId,
|
||||
_id: dataId
|
||||
},
|
||||
{
|
||||
$unset: { errorMsg: '' },
|
||||
retryCount: 3,
|
||||
...(q !== undefined && { q }),
|
||||
...(a !== undefined && { a }),
|
||||
...(chunkIndex !== undefined && { chunkIndex }),
|
||||
lockTime: addMinutes(new Date(), -10)
|
||||
}
|
||||
);
|
||||
// Add to chunk
|
||||
if (data.imageId && q) {
|
||||
await MongoDatasetTraining.updateOne(
|
||||
{
|
||||
teamId,
|
||||
datasetId,
|
||||
_id: dataId
|
||||
},
|
||||
{
|
||||
$unset: { errorMsg: '' },
|
||||
retryCount: 3,
|
||||
mode: TrainingModeEnum.chunk,
|
||||
...(q !== undefined && { q }),
|
||||
...(a !== undefined && { a }),
|
||||
...(chunkIndex !== undefined && { chunkIndex }),
|
||||
lockTime: new Date('2000')
|
||||
}
|
||||
);
|
||||
} else {
|
||||
await MongoDatasetTraining.updateOne(
|
||||
{
|
||||
teamId,
|
||||
datasetId,
|
||||
_id: dataId
|
||||
},
|
||||
{
|
||||
$unset: { errorMsg: '' },
|
||||
retryCount: 3,
|
||||
...(q !== undefined && { q }),
|
||||
...(a !== undefined && { a }),
|
||||
...(chunkIndex !== undefined && { chunkIndex }),
|
||||
lockTime: new Date('2000')
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -22,7 +22,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
let {
|
||||
file,
|
||||
data: { appId, duration, shareId, outLinkUid, teamId: spaceTeamId, teamToken }
|
||||
} = await upload.doUpload<
|
||||
} = await upload.getUploadFile<
|
||||
OutLinkChatAuthProps & {
|
||||
appId: string;
|
||||
duration: number;
|
||||
|
||||
@ -12,6 +12,7 @@ import { TimerIdEnum } from '@fastgpt/service/common/system/timerLock/constants'
|
||||
import { addHours } from 'date-fns';
|
||||
import { getScheduleTriggerApp } from '@/service/core/app/utils';
|
||||
import { clearExpiredRawTextBufferCron } from '@fastgpt/service/common/buffer/rawText/controller';
|
||||
import { clearExpiredDatasetImageCron } from '@fastgpt/service/core/dataset/image/controller';
|
||||
|
||||
// Try to run train every minute
|
||||
const setTrainingQueueCron = () => {
|
||||
@ -85,4 +86,5 @@ export const startCron = () => {
|
||||
clearInvalidDataCron();
|
||||
scheduleTriggerAppCron();
|
||||
clearExpiredRawTextBufferCron();
|
||||
clearExpiredDatasetImageCron();
|
||||
};
|
||||
|
||||
@ -1,11 +1,13 @@
|
||||
import { type DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
export const quoteDataFieldSelector = '_id q a history updateTime chunkIndex';
|
||||
export const quoteDataFieldSelector =
|
||||
'_id teamId datasetId q a imageId history updateTime chunkIndex';
|
||||
|
||||
export type QuoteDataItemType = {
|
||||
_id: string;
|
||||
q: DatasetDataSchemaType['q'];
|
||||
a: DatasetDataSchemaType['a'];
|
||||
q: string;
|
||||
a?: string;
|
||||
imagePreivewUrl?: string;
|
||||
history?: DatasetDataSchemaType['history'];
|
||||
updateTime: DatasetDataSchemaType['updateTime'];
|
||||
index: DatasetDataSchemaType['chunkIndex'];
|
||||
|
||||
@ -1,19 +1,12 @@
|
||||
import { type DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { type QuoteDataItemType } from './constants';
|
||||
|
||||
// 获取对话时间时,引用的内容
|
||||
export function processChatTimeFilter(
|
||||
dataList: DatasetDataSchemaType[],
|
||||
dataList: QuoteDataItemType[],
|
||||
chatTime: Date
|
||||
): QuoteDataItemType[] {
|
||||
return dataList.map((item) => {
|
||||
const defaultItem = {
|
||||
_id: item._id,
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
updateTime: item.updateTime,
|
||||
index: item.chunkIndex
|
||||
};
|
||||
const defaultItem = item;
|
||||
|
||||
if (!item.history) return defaultItem;
|
||||
|
||||
@ -35,11 +28,10 @@ export function processChatTimeFilter(
|
||||
const latestHistory = history[latestHistoryIndex];
|
||||
|
||||
return {
|
||||
_id: item._id,
|
||||
...item,
|
||||
q: latestHistory.q,
|
||||
a: latestHistory.a,
|
||||
updateTime: latestHistory.updateTime,
|
||||
index: item.chunkIndex,
|
||||
updated: true
|
||||
};
|
||||
});
|
||||
|
||||
@ -18,6 +18,7 @@ import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTex
|
||||
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { countPromptTokens } from '@fastgpt/service/common/string/tiktoken';
|
||||
import { deleteDatasetImage } from '@fastgpt/service/core/dataset/image/controller';
|
||||
|
||||
const formatIndexes = async ({
|
||||
indexes = [],
|
||||
@ -142,7 +143,8 @@ export async function insertData2Dataset({
|
||||
datasetId,
|
||||
collectionId,
|
||||
q,
|
||||
a = '',
|
||||
a,
|
||||
imageId,
|
||||
chunkIndex = 0,
|
||||
indexSize = 512,
|
||||
indexes,
|
||||
@ -207,6 +209,7 @@ export async function insertData2Dataset({
|
||||
tmbId,
|
||||
datasetId,
|
||||
collectionId,
|
||||
imageId,
|
||||
q,
|
||||
a,
|
||||
chunkIndex,
|
||||
@ -391,8 +394,16 @@ export async function updateData2Dataset({
|
||||
|
||||
export const deleteDatasetData = async (data: DatasetDataItemType) => {
|
||||
await mongoSessionRun(async (session) => {
|
||||
// 1. Delete MongoDB data
|
||||
await MongoDatasetData.deleteOne({ _id: data.id }, { session });
|
||||
await MongoDatasetDataText.deleteMany({ dataId: data.id }, { session });
|
||||
|
||||
// 2. If there are any image files, delete the image records and GridFS file.
|
||||
if (data.imageId) {
|
||||
await deleteDatasetImage(data.imageId);
|
||||
}
|
||||
|
||||
// 3. Delete vector data
|
||||
await deleteDatasetDataVector({
|
||||
teamId: data.teamId,
|
||||
idList: data.indexes.map((item) => item.dataId)
|
||||
|
||||
@ -15,6 +15,7 @@ import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { type DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import type { Document } from '@fastgpt/service/common/mongo';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { getMaxIndexSize } from '@fastgpt/global/core/dataset/training/utils';
|
||||
|
||||
const reduceQueue = () => {
|
||||
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
|
||||
@ -261,8 +262,9 @@ const insertData = async ({
|
||||
collectionId: trainingData.collectionId,
|
||||
q: trainingData.q,
|
||||
a: trainingData.a,
|
||||
imageId: trainingData.imageId,
|
||||
chunkIndex: trainingData.chunkIndex,
|
||||
indexSize: trainingData.indexSize,
|
||||
indexSize: trainingData.indexSize || getMaxIndexSize(getEmbeddingModel(trainingData.model)),
|
||||
indexes: trainingData.indexes,
|
||||
embeddingModel: trainingData.model,
|
||||
session
|
||||
|
||||
@ -1,14 +1,14 @@
|
||||
import { postUploadImg, postUploadFiles } from '@/web/common/file/api';
|
||||
import { type UploadImgProps } from '@fastgpt/global/common/file/api';
|
||||
import type { UploadImgProps } from '@fastgpt/global/common/file/api';
|
||||
import type { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { type preUploadImgProps } from '@fastgpt/global/common/file/api';
|
||||
import type { preUploadImgProps } from '@fastgpt/global/common/file/api';
|
||||
import { compressBase64Img, type CompressImgProps } from '@fastgpt/web/common/file/img';
|
||||
import type { UploadChatFileProps, UploadDatasetFileProps } from '@/pages/api/common/file/upload';
|
||||
|
||||
/**
|
||||
* upload file to mongo gridfs
|
||||
*/
|
||||
export const uploadFile2DB = ({
|
||||
export const uploadFile2DB = async ({
|
||||
file,
|
||||
bucketName,
|
||||
data,
|
||||
@ -21,18 +21,21 @@ export const uploadFile2DB = ({
|
||||
metadata?: Record<string, any>;
|
||||
percentListen?: (percent: number) => void;
|
||||
}) => {
|
||||
const form = new FormData();
|
||||
form.append('metadata', JSON.stringify(metadata));
|
||||
form.append('bucketName', bucketName);
|
||||
form.append('file', file, encodeURIComponent(file.name));
|
||||
form.append('data', JSON.stringify(data));
|
||||
const formData = new FormData();
|
||||
formData.append('metadata', JSON.stringify(metadata));
|
||||
formData.append('bucketName', bucketName);
|
||||
formData.append('file', file, encodeURIComponent(file.name));
|
||||
if (data) {
|
||||
formData.append('data', JSON.stringify(data));
|
||||
}
|
||||
|
||||
return postUploadFiles(form, (e) => {
|
||||
const res = await postUploadFiles(formData, (e) => {
|
||||
if (!e.total) return;
|
||||
|
||||
const percent = Math.round((e.loaded / e.total) * 100);
|
||||
percentListen?.(percent);
|
||||
});
|
||||
return res;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -74,7 +77,6 @@ export const compressImgFileAndUpload = async ({
|
||||
resolve(reader.result as string);
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log(err);
|
||||
reject('Load image error');
|
||||
};
|
||||
});
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user