perf: redirect request and err log replace (#768)
perf: dataset openapi openapi
This commit is contained in:
parent
379673cae1
commit
318116627c
@ -342,7 +342,7 @@ data 为集合的 ID。
|
|||||||
{{< /tabs >}}
|
{{< /tabs >}}
|
||||||
|
|
||||||
|
|
||||||
### 创建一个纯文本集合(商业版)
|
### 创建一个纯文本集合
|
||||||
|
|
||||||
传入一段文字,创建一个集合,会根据传入的文字进行分割。
|
传入一段文字,创建一个集合,会根据传入的文字进行分割。
|
||||||
|
|
||||||
@ -351,7 +351,7 @@ data 为集合的 ID。
|
|||||||
{{< markdownify >}}
|
{{< markdownify >}}
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/text' \
|
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/text' \
|
||||||
--header 'Authorization: Bearer {{authorization}}' \
|
--header 'Authorization: Bearer {{authorization}}' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
@ -418,7 +418,7 @@ data 为集合的 ID。
|
|||||||
{{< /tab >}}
|
{{< /tab >}}
|
||||||
{{< /tabs >}}
|
{{< /tabs >}}
|
||||||
|
|
||||||
### 创建一个链接集合(商业版)
|
### 创建一个链接集合
|
||||||
|
|
||||||
传入一个网络链接,创建一个集合,会先去对应网页抓取内容,再抓取的文字进行分割。
|
传入一个网络链接,创建一个集合,会先去对应网页抓取内容,再抓取的文字进行分割。
|
||||||
|
|
||||||
@ -427,7 +427,7 @@ data 为集合的 ID。
|
|||||||
{{< markdownify >}}
|
{{< markdownify >}}
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/link' \
|
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/link' \
|
||||||
--header 'Authorization: Bearer {{authorization}}' \
|
--header 'Authorization: Bearer {{authorization}}' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
|||||||
@ -0,0 +1,88 @@
|
|||||||
|
/*
|
||||||
|
Create one dataset collection
|
||||||
|
*/
|
||||||
|
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||||
|
import { jsonRes } from '@fastgpt/service/common/response';
|
||||||
|
import { connectToDatabase } from '@/service/mongo';
|
||||||
|
import type { LinkCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||||
|
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||||
|
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||||
|
import {
|
||||||
|
TrainingModeEnum,
|
||||||
|
DatasetCollectionTypeEnum
|
||||||
|
} from '@fastgpt/global/core/dataset/constants';
|
||||||
|
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
|
||||||
|
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||||
|
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
|
||||||
|
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
||||||
|
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||||
|
import { reloadCollectionChunks } from '@fastgpt/service/core/dataset/collection/utils';
|
||||||
|
|
||||||
|
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||||
|
try {
|
||||||
|
await connectToDatabase();
|
||||||
|
const {
|
||||||
|
link,
|
||||||
|
trainingType = TrainingModeEnum.chunk,
|
||||||
|
chunkSize = 512,
|
||||||
|
chunkSplitter,
|
||||||
|
qaPrompt,
|
||||||
|
...body
|
||||||
|
} = req.body as LinkCreateDatasetCollectionParams;
|
||||||
|
|
||||||
|
const { teamId, tmbId, dataset } = await authDataset({
|
||||||
|
req,
|
||||||
|
authToken: true,
|
||||||
|
authApiKey: true,
|
||||||
|
datasetId: body.datasetId,
|
||||||
|
per: 'w'
|
||||||
|
});
|
||||||
|
|
||||||
|
// 1. check dataset limit
|
||||||
|
await checkDatasetLimit({
|
||||||
|
teamId,
|
||||||
|
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
|
||||||
|
insertLen: predictDataLimitLength(trainingType, new Array(10))
|
||||||
|
});
|
||||||
|
|
||||||
|
// 2. create collection
|
||||||
|
const collectionId = await createOneCollection({
|
||||||
|
...body,
|
||||||
|
name: link,
|
||||||
|
teamId,
|
||||||
|
tmbId,
|
||||||
|
type: DatasetCollectionTypeEnum.link,
|
||||||
|
|
||||||
|
trainingType,
|
||||||
|
chunkSize,
|
||||||
|
chunkSplitter,
|
||||||
|
qaPrompt,
|
||||||
|
|
||||||
|
rawLink: link
|
||||||
|
});
|
||||||
|
|
||||||
|
// 3. create bill and start sync
|
||||||
|
const { billId } = await createTrainingBill({
|
||||||
|
teamId,
|
||||||
|
tmbId,
|
||||||
|
appName: 'core.dataset.collection.Sync Collection',
|
||||||
|
billSource: BillSourceEnum.training,
|
||||||
|
vectorModel: getVectorModel(dataset.vectorModel).name,
|
||||||
|
agentModel: getQAModel(dataset.agentModel).name
|
||||||
|
});
|
||||||
|
await reloadCollectionChunks({
|
||||||
|
collectionId,
|
||||||
|
tmbId,
|
||||||
|
billId
|
||||||
|
});
|
||||||
|
|
||||||
|
jsonRes(res, {
|
||||||
|
data: { collectionId }
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
jsonRes(res, {
|
||||||
|
code: 500,
|
||||||
|
error: err
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,117 @@
|
|||||||
|
/*
|
||||||
|
Create one dataset collection
|
||||||
|
*/
|
||||||
|
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||||
|
import { jsonRes } from '@fastgpt/service/common/response';
|
||||||
|
import { connectToDatabase } from '@/service/mongo';
|
||||||
|
import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||||
|
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||||
|
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||||
|
import {
|
||||||
|
TrainingModeEnum,
|
||||||
|
DatasetCollectionTypeEnum
|
||||||
|
} from '@fastgpt/global/core/dataset/constants';
|
||||||
|
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||||
|
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
|
||||||
|
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||||
|
import { pushDataToTrainingQueue } from '@/service/core/dataset/data/controller';
|
||||||
|
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||||
|
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
|
||||||
|
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
||||||
|
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||||
|
|
||||||
|
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||||
|
try {
|
||||||
|
await connectToDatabase();
|
||||||
|
const {
|
||||||
|
name,
|
||||||
|
text,
|
||||||
|
trainingType = TrainingModeEnum.chunk,
|
||||||
|
chunkSize = 512,
|
||||||
|
chunkSplitter,
|
||||||
|
qaPrompt,
|
||||||
|
...body
|
||||||
|
} = req.body as TextCreateDatasetCollectionParams;
|
||||||
|
|
||||||
|
const { teamId, tmbId, dataset } = await authDataset({
|
||||||
|
req,
|
||||||
|
authToken: true,
|
||||||
|
authApiKey: true,
|
||||||
|
datasetId: body.datasetId,
|
||||||
|
per: 'w'
|
||||||
|
});
|
||||||
|
|
||||||
|
// 1. split text to chunks
|
||||||
|
const { chunks } = splitText2Chunks({
|
||||||
|
text,
|
||||||
|
chunkLen: chunkSize,
|
||||||
|
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||||
|
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||||
|
});
|
||||||
|
|
||||||
|
// 2. check dataset limit
|
||||||
|
await checkDatasetLimit({
|
||||||
|
teamId,
|
||||||
|
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
|
||||||
|
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||||
|
});
|
||||||
|
|
||||||
|
// 3. create collection and training bill
|
||||||
|
const [collectionId, { billId }] = await Promise.all([
|
||||||
|
createOneCollection({
|
||||||
|
...body,
|
||||||
|
teamId,
|
||||||
|
tmbId,
|
||||||
|
type: DatasetCollectionTypeEnum.virtual,
|
||||||
|
|
||||||
|
name,
|
||||||
|
trainingType,
|
||||||
|
chunkSize,
|
||||||
|
chunkSplitter,
|
||||||
|
qaPrompt,
|
||||||
|
|
||||||
|
hashRawText: hashStr(text),
|
||||||
|
rawTextLength: text.length
|
||||||
|
}),
|
||||||
|
createTrainingBill({
|
||||||
|
teamId,
|
||||||
|
tmbId,
|
||||||
|
appName: name,
|
||||||
|
billSource: BillSourceEnum.training,
|
||||||
|
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||||
|
agentModel: getQAModel(dataset.agentModel)?.name
|
||||||
|
})
|
||||||
|
]);
|
||||||
|
|
||||||
|
// 4. push chunks to training queue
|
||||||
|
const insertResults = await pushDataToTrainingQueue({
|
||||||
|
teamId,
|
||||||
|
tmbId,
|
||||||
|
collectionId,
|
||||||
|
trainingMode: trainingType,
|
||||||
|
prompt: qaPrompt,
|
||||||
|
billId,
|
||||||
|
data: chunks.map((text, index) => ({
|
||||||
|
q: text,
|
||||||
|
chunkIndex: index
|
||||||
|
}))
|
||||||
|
});
|
||||||
|
|
||||||
|
jsonRes(res, {
|
||||||
|
data: { collectionId, results: insertResults }
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
jsonRes(res, {
|
||||||
|
code: 500,
|
||||||
|
error: err
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const config = {
|
||||||
|
api: {
|
||||||
|
bodyParser: {
|
||||||
|
sizeLimit: '10mb'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
@ -76,7 +76,7 @@ export const getDatasetCollectionById = (id: string) =>
|
|||||||
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
|
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
|
||||||
POST<string>(`/core/dataset/collection/create`, data);
|
POST<string>(`/core/dataset/collection/create`, data);
|
||||||
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
|
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
|
||||||
POST<{ collectionId: string }>(`/proApi/core/dataset/collection/create/link`, data);
|
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
|
||||||
|
|
||||||
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
|
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
|
||||||
POST(`/core/dataset/collection/update`, data);
|
POST(`/core/dataset/collection/update`, data);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user