import React, { useState, useCallback, useMemo } from 'react'; import { Box, Flex, Button, useTheme, NumberInput, NumberInputField, NumberInputStepper, NumberIncrementStepper, NumberDecrementStepper, Image } from '@chakra-ui/react'; import { useToast } from '@/hooks/useToast'; import { useConfirm } from '@/hooks/useConfirm'; import { useRouter } from 'next/router'; import { useMutation } from '@tanstack/react-query'; import { postKbDataFromList } from '@/api/plugins/kb'; import { splitText2Chunks } from '@/utils/file'; import { getErrText } from '@/utils/tools'; import { formatPrice } from '@/utils/user'; import { vectorModelList } from '@/store/static'; import MyIcon from '@/components/Icon'; import CloseIcon from '@/components/Icon/close'; import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete'; import MyTooltip from '@/components/MyTooltip'; import { QuestionOutlineIcon } from '@chakra-ui/icons'; import { TrainingModeEnum } from '@/constants/plugin'; import FileSelect, { type FileItemType } from './FileSelect'; const fileExtension = '.txt, .doc, .docx, .pdf, .md'; const ChunkImport = ({ kbId }: { kbId: string }) => { const model = vectorModelList[0]?.model || 'text-embedding-ada-002'; const unitPrice = vectorModelList[0]?.price || 0.2; const theme = useTheme(); const router = useRouter(); const { toast } = useToast(); const [chunkLen, setChunkLen] = useState(500); const [showRePreview, setShowRePreview] = useState(false); const [files, setFiles] = useState([]); const [previewFile, setPreviewFile] = useState(); const [successChunks, setSuccessChunks] = useState(0); const totalChunk = useMemo( () => files.reduce((sum, file) => sum + file.chunks.length, 0), [files] ); const emptyFiles = useMemo(() => files.length === 0, [files]); // price count const price = useMemo(() => { return formatPrice(files.reduce((sum, file) => sum + file.tokens, 0) * unitPrice); }, [files, unitPrice]); const { openConfirm, ConfirmModal } = useConfirm({ content: `该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。` }); const { mutate: onclickUpload, isLoading: uploading } = useMutation({ mutationFn: async () => { const chunks = files.map((file) => file.chunks).flat(); // subsection import let success = 0; const step = 500; for (let i = 0; i < chunks.length; i += step) { const { insertLen } = await postKbDataFromList({ kbId, model, data: chunks.slice(i, i + step), mode: TrainingModeEnum.index }); success += insertLen; setSuccessChunks(success); } toast({ title: `去重后共导入 ${success} 条数据,请耐心等待训练.`, status: 'success' }); router.replace({ query: { kbId, currentTab: 'data' } }); }, onError(err) { toast({ title: getErrText(err, '导入文件失败'), status: 'error' }); } }); const onRePreview = useCallback(async () => { try { setFiles((state) => state.map((file) => { const splitRes = splitText2Chunks({ text: file.text, maxLen: chunkLen }); return { ...file, tokens: splitRes.tokens, chunks: splitRes.chunks.map((chunk) => ({ q: chunk, a: '', source: file.filename })) }; }) ); setPreviewFile(undefined); setShowRePreview(false); } catch (error) { toast({ status: 'warning', title: getErrText(error, '文本分段异常') }); } }, [chunkLen, toast]); const filenameStyles = { className: 'textEllipsis', maxW: '400px' }; return ( { setFiles((state) => files.concat(state)); }} chunkLen={chunkLen} py={emptyFiles ? '100px' : 5} /> {!emptyFiles && ( <> {files.map((item) => ( setPreviewFile(item)} > {''} {item.filename} { e.stopPropagation(); setFiles((state) => state.filter((file) => file.id !== item.id)); }} /> ))} {/* chunk size */} 段落长度 { setChunkLen(+e); setShowRePreview(true); }} > {/* price */} 预估价格 {price}元 {showRePreview && ( )} )} {!emptyFiles && ( {previewFile ? ( {previewFile.filename} setPreviewFile(undefined)} /> { // @ts-ignore const val = e.target.innerText; setShowRePreview(true); setFiles((state) => state.map((file) => file.id === previewFile.id ? { ...file, text: val } : file ) ); }} /> ) : ( 分段预览({totalChunk}组) {totalChunk > 100 && ( 仅展示部分 )} {files.map((file) => file.chunks.slice(0, 50).map((chunk, i) => ( # {i + 1} {file.filename} { setFiles((state) => state.map((stateFile) => stateFile.id === file.id ? { ...file, chunks: [ ...file.chunks.slice(0, i), ...file.chunks.slice(i + 1) ] } : stateFile ) ); }} /> { // @ts-ignore const val = e.target.innerText; /* delete file */ if (val === '') { setFiles((state) => state.map((stateFile) => stateFile.id === file.id ? { ...file, chunks: [ ...file.chunks.slice(0, i), ...file.chunks.slice(i + 1) ] } : stateFile ) ); } else { // update file setFiles((stateFiles) => stateFiles.map((stateFile) => file.id === stateFile.id ? { ...stateFile, chunks: stateFile.chunks.map((chunk, index) => ({ ...chunk, q: i === index ? val : chunk.q })) } : stateFile ) ); } }} /> )) )} )} )} ); }; export default ChunkImport;