From dc329041f37c5cd229fd82fd200c4ed5c351f30c Mon Sep 17 00:00:00 2001 From: archer <545436317@qq.com> Date: Wed, 5 Apr 2023 16:10:47 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=A0=B9=E6=8D=AEurl=E8=8E=B7=E5=8F=96?= =?UTF-8?q?=E7=BD=91=E7=AB=99=E6=96=87=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api/model.ts | 50 +++++- src/pages/api/model/data/exportModelData.ts | 1 - src/pages/api/model/data/fetchingUrlData.ts | 36 ++++ src/pages/api/model/data/getSplitData.ts | 2 +- .../model/detail/components/ModelDataCard.tsx | 26 ++- .../detail/components/SelectFileModal.tsx | 10 +- .../detail/components/SelectUrlModal.tsx | 168 ++++++++++++++++++ src/utils/tools.ts | 10 +- 8 files changed, 278 insertions(+), 25 deletions(-) create mode 100644 src/pages/api/model/data/fetchingUrlData.ts create mode 100644 src/pages/model/detail/components/SelectUrlModal.tsx diff --git a/src/api/model.ts b/src/api/model.ts index 1c84f9924..8f53e32c7 100644 --- a/src/api/model.ts +++ b/src/api/model.ts @@ -5,15 +5,30 @@ import { TrainingItemType } from '../types/training'; import { RequestPaging } from '../types/index'; import { Obj2Query } from '@/utils/tools'; +/** + * 获取模型列表 + */ export const getMyModels = () => GET('/model/list'); +/** + * 创建一个模型 + */ export const postCreateModel = (data: { name: string; serviceModelName: string }) => POST('/model/create', data); +/** + * 根据 ID 删除模型 + */ export const delModelById = (id: string) => DELETE(`/model/del?modelId=${id}`); +/** + * 根据 ID 获取模型 + */ export const getModelById = (id: string) => GET(`/model/detail?modelId=${id}`); +/** + * 根据 ID 更新模型 + */ export const putModelById = (id: string, data: ModelUpdateParams) => PUT(`/model/update?modelId=${id}`, data); @@ -35,29 +50,58 @@ export const getModelTrainings = (id: string) => type GetModelDataListProps = RequestPaging & { modelId: string; }; +/** + * 获取模型的知识库数据 + */ export const getModelDataList = (props: GetModelDataListProps) => GET(`/model/data/getModelData?${Obj2Query(props)}`); +/** + * 获取导出数据(不分页) + */ export const getExportDataList = (modelId: string) => GET(`/model/data/exportModelData?modelId=${modelId}`); -export const getModelSplitDataList = (modelId: string) => - GET(`/model/data/getSplitData?modelId=${modelId}`); +/** + * 获取模型正在拆分数据的数量 + */ +export const getModelSplitDataListLen = (modelId: string) => + GET(`/model/data/getSplitData?modelId=${modelId}`); +/** + * 获取 web 页面内容 + */ +export const getWebContent = (url: string) => POST(`/model/data/fetchingUrlData`, { url }); + +/** + * 手动输入数据 + */ export const postModelDataInput = (data: { modelId: string; data: { text: ModelDataSchema['text']; q: ModelDataSchema['q'] }[]; }) => POST(`/model/data/pushModelDataInput`, data); -export const postModelDataFileText = (data: { modelId: string; text: string; prompt: string }) => +/** + * 拆分数据 + */ +export const postModelDataSplitData = (data: { modelId: string; text: string; prompt: string }) => POST(`/model/data/splitData`, data); +/** + * json导入数据 + */ export const postModelDataJsonData = ( modelId: string, jsonData: { prompt: string; completion: string; vector?: number[] }[] ) => POST(`/model/data/pushModelDataJson`, { modelId, data: jsonData }); +/** + * 更新模型数据 + */ export const putModelDataById = (data: { dataId: string; text: string; q?: string }) => PUT('/model/data/putModelData', data); +/** + * 删除一条模型数据 + */ export const delOneModelData = (dataId: string) => DELETE(`/model/data/delModelDataById?dataId=${dataId}`); diff --git a/src/pages/api/model/data/exportModelData.ts b/src/pages/api/model/data/exportModelData.ts index 2c0a9fd89..314796c34 100644 --- a/src/pages/api/model/data/exportModelData.ts +++ b/src/pages/api/model/data/exportModelData.ts @@ -4,7 +4,6 @@ import { connectToDatabase } from '@/service/mongo'; import { authToken } from '@/service/utils/tools'; import { connectRedis } from '@/service/redis'; import { VecModelDataIdx } from '@/constants/redis'; -import { BufferToVector } from '@/utils/tools'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { diff --git a/src/pages/api/model/data/fetchingUrlData.ts b/src/pages/api/model/data/fetchingUrlData.ts new file mode 100644 index 000000000..9c8fba4a0 --- /dev/null +++ b/src/pages/api/model/data/fetchingUrlData.ts @@ -0,0 +1,36 @@ +import type { NextApiRequest, NextApiResponse } from 'next'; +import { jsonRes } from '@/service/response'; +import { connectToDatabase } from '@/service/mongo'; +import { authToken } from '@/service/utils/tools'; +import axios from 'axios'; +import { httpsAgent } from '@/service/utils/tools'; + +/** + * 读取网站的内容 + */ +export default async function handler(req: NextApiRequest, res: NextApiResponse) { + try { + const { url } = req.body as { url: string }; + if (!url) { + throw new Error('缺少 url'); + } + await connectToDatabase(); + + const { authorization } = req.headers; + + await authToken(authorization); + + const data = await axios + .get(url, { + httpsAgent + }) + .then((res) => res.data as string); + + jsonRes(res, { data }); + } catch (err) { + jsonRes(res, { + code: 500, + error: err + }); + } +} diff --git a/src/pages/api/model/data/getSplitData.ts b/src/pages/api/model/data/getSplitData.ts index 2e296cf78..8b648442f 100644 --- a/src/pages/api/model/data/getSplitData.ts +++ b/src/pages/api/model/data/getSplitData.ts @@ -24,7 +24,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) }); jsonRes(res, { - data + data: data.map((item) => item.textList).flat().length }); } catch (err) { jsonRes(res, { diff --git a/src/pages/model/detail/components/ModelDataCard.tsx b/src/pages/model/detail/components/ModelDataCard.tsx index a40341d15..ba1778853 100644 --- a/src/pages/model/detail/components/ModelDataCard.tsx +++ b/src/pages/model/detail/components/ModelDataCard.tsx @@ -24,7 +24,7 @@ import { usePagination } from '@/hooks/usePagination'; import { getModelDataList, delOneModelData, - getModelSplitDataList, + getModelSplitDataListLen, getExportDataList } from '@/api/model'; import { DeleteIcon, RepeatIcon, EditIcon } from '@chakra-ui/icons'; @@ -36,6 +36,7 @@ import type { FormData as InputDataType } from './InputDataModal'; const InputModel = dynamic(() => import('./InputDataModal')); const SelectFileModel = dynamic(() => import('./SelectFileModal')); +const SelectUrlModel = dynamic(() => import('./SelectUrlModal')); const SelectJsonModel = dynamic(() => import('./SelectJsonModal')); const ModelDataCard = ({ model }: { model: ModelSchema }) => { @@ -63,14 +64,19 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => { onOpen: onOpenSelectFileModal, onClose: onCloseSelectFileModal } = useDisclosure(); + const { + isOpen: isOpenSelectUrlModal, + onOpen: onOpenSelectUrlModal, + onClose: onCloseSelectUrlModal + } = useDisclosure(); const { isOpen: isOpenSelectJsonModal, onOpen: onOpenSelectJsonModal, onClose: onCloseSelectJsonModal } = useDisclosure(); - const { data: splitDataList, refetch } = useQuery(['getModelSplitDataList'], () => - getModelSplitDataList(model._id) + const { data: splitDataLen, refetch } = useQuery(['getModelSplitDataList'], () => + getModelSplitDataListLen(model._id) ); const refetchData = useCallback( @@ -143,14 +149,13 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => { 手动输入 文件导入 + 网站地址导入 JSON导入 - {splitDataList && splitDataList.length > 0 && ( - - {splitDataList.map((item) => item.textList).flat().length}条数据正在拆分... - + {!!(splitDataLen && splitDataLen > 0) && ( + {splitDataLen}条数据正在拆分... )} @@ -236,6 +241,13 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => { onSuccess={refetchData} /> )} + {isOpenSelectUrlModal && ( + + )} {isOpenSelectJsonModal && ( { if (!fileText) return; - await postModelDataFileText({ + await postModelDataSplitData({ modelId, text: fileText, prompt: `下面是${prompt || '一段长文本'}` @@ -126,10 +127,11 @@ const SelectFileModal = ({ 支持 {fileExtension} 文件。模型会自动对文本进行 QA 拆分,需要较长训练时间,拆分需要消耗 - tokens,大约0.04元/1k tokens,请确保账号余额充足。 + tokens,账号余额不足时,未拆分的数据会被删除。 - 一共 {fileText.length} 个字,{encode(fileText).length} 个tokens + 一共 {encode(fileText).length} 个tokens,大约 {formatPrice(encode(fileText).length * 4)} + 元 diff --git a/src/pages/model/detail/components/SelectUrlModal.tsx b/src/pages/model/detail/components/SelectUrlModal.tsx new file mode 100644 index 000000000..7912eb3b6 --- /dev/null +++ b/src/pages/model/detail/components/SelectUrlModal.tsx @@ -0,0 +1,168 @@ +import React, { useState } from 'react'; +import { + Box, + Flex, + Button, + Modal, + ModalOverlay, + ModalContent, + ModalHeader, + ModalCloseButton, + ModalBody, + Input, + Textarea +} from '@chakra-ui/react'; +import { useToast } from '@/hooks/useToast'; +import { customAlphabet } from 'nanoid'; +import { encode } from 'gpt-token-utils'; +import { useConfirm } from '@/hooks/useConfirm'; +import { useMutation } from '@tanstack/react-query'; +import { postModelDataSplitData, getWebContent } from '@/api/model'; +import { formatPrice } from '@/utils/user'; + +const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12); + +const SelectUrlModal = ({ + onClose, + onSuccess, + modelId +}: { + onClose: () => void; + onSuccess: () => void; + modelId: string; +}) => { + const { toast } = useToast(); + const [webUrl, setWebUrl] = useState(''); + const [webText, setWebText] = useState(''); + const [prompt, setPrompt] = useState(''); // 提示词 + const { openConfirm, ConfirmChild } = useConfirm({ + content: '确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,任务讲被终止。' + }); + + const { mutate: onclickImport, isLoading: isImporting } = useMutation({ + mutationFn: async () => { + if (!webText) return; + await postModelDataSplitData({ + modelId, + text: webText, + prompt: `下面是${prompt || '一段长文本'}` + }); + toast({ + title: '导入数据成功,需要一段拆解和训练', + status: 'success' + }); + onClose(); + onSuccess(); + }, + onError(error) { + console.log(error); + toast({ + title: '导入数据失败', + status: 'error' + }); + } + }); + + const { mutate: onclickFetchingUrl, isLoading: isFetching } = useMutation({ + mutationFn: async () => { + if (!webUrl) return; + const res = await getWebContent(webUrl); + const parser = new DOMParser(); + const htmlDoc = parser.parseFromString(res, 'text/html'); + const data = htmlDoc?.body?.innerText || ''; + + if (!data) { + throw new Error('获取不到数据'); + } + setWebText(data.replace(/\s+/g, ' ')); + }, + onError(error) { + console.log(error); + toast({ + status: 'error', + title: '获取网站内容失败' + }); + } + }); + + return ( + + + + 网站地址导入 + + + + + 根据网站地址,获取网站文本内容(请注意获取后的内容,不是每个网站内容都能获取到的)。模型会对文本进行 + QA 拆分,需要较长训练时间,拆分需要消耗 tokens,账号余额不足时,未拆分的数据会被删除。 + + + 一共 {encode(webText).length} 个tokens,大约 {formatPrice(encode(webText).length * 4)}元 + + + 网站地址 + setWebUrl(e.target.value)} + size={'sm'} + /> + + + + + 下面是 + + setPrompt(e.target.value)} + size={'sm'} + /> + +