Archer 9d27de154b
4.7-alpha2 (#1027)
* feat: stop toolCall and rename some field. (#46)

* perf: node delete tip;pay tip

* fix: toolCall cannot save child answer

* feat: stop tool

* fix: team modal

* fix feckbackMoal  auth bug (#47)

* 简单的支持提示词运行tool。优化workflow模板 (#49)

* remove templates

* fix: request body undefined

* feat: prompt tool run

* feat: workflow tamplates modal

* perf: plugin start

* 4.7 (#50)

* fix docker-compose download url (#994)

original code is a bad url with '404 NOT FOUND' return.
fix docker-compose download url, add 'v' before docker-compose version

* Update ai_settings.md (#1000)

* Update configuration.md

* Update configuration.md

* Fix history in classifyQuestion and extract modules (#1012)

* Fix history in classifyQuestion and extract modules

* Add chatValue2RuntimePrompt import and update text formatting

* flow controller to packages

* fix: rerank select

* modal ui

* perf: modal code path

* point not sufficient

* feat: http url support variable

* fix http key

* perf: prompt

* perf: ai setting modal

* simple edit ui

---------

Co-authored-by: entorick <entorick11@qq.com>
Co-authored-by: liujianglc <liujianglc@163.com>
Co-authored-by: Fengrui Liu <liufengrui.work@bytedance.com>

* fix team share redirect to login (#51)

* feat: support openapi import plugins (#48)

* feat: support openapi import plugins

* feat: import from url

* fix: add body params parse

* fix build

* fix

* fix

* fix

* tool box ui (#52)

* fix: training queue

* feat: simple edit tool select

* perf: simple edit dataset prompt

* fix: chatbox tool ux

* feat: quote prompt module

* perf: plugin tools sign

* perf: model avatar

* tool selector ui

* feat: max histories

* perf: http plugin import (#53)

* perf: plugin http import

* chatBox ui

* perf: name

* fix: Node template card (#54)

* fix: ts

* setting modal

* package

* package

* feat: add plugins search (#57)

* feat: add plugins search

* perf: change http plugin header input

* Yjl (#56)

* perf: prompt tool call

* perf: chat box ux

* doc

* doc

* price tip

* perf: tool selector

* ui'

* fix: vector queue

* fix: empty tool and empty response

* fix: empty msg

* perf: pg index

* perf: ui tip

* doc

* tool tip

---------

Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com>
Co-authored-by: entorick <entorick11@qq.com>
Co-authored-by: liujianglc <liujianglc@163.com>
Co-authored-by: Fengrui Liu <liufengrui.work@bytedance.com>
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
2024-03-21 13:32:31 +08:00

183 lines
4.6 KiB
TypeScript

import { delay } from '@fastgpt/global/common/system/utils';
import { MongoDatasetTraining } from './schema';
import type {
PushDatasetDataChunkProps,
PushDatasetDataProps,
PushDatasetDataResponse
} from '@fastgpt/global/core/dataset/api.d';
import { getCollectionWithDataset } from '../controller';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
import type { VectorModelItemType, LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
try {
await MongoDatasetTraining.updateMany(
{
teamId
},
{
lockTime: new Date('2999/5/5')
}
);
} catch (error) {}
};
export async function pushDataListToTrainingQueue({
teamId,
tmbId,
collectionId,
data,
prompt,
billId,
trainingMode = TrainingModeEnum.chunk
}: {
teamId: string;
tmbId: string;
} & PushDatasetDataProps): Promise<PushDatasetDataResponse> {
const vectorModelList = global.vectorModels;
const datasetModelList = global.llmModels;
const {
datasetId: { _id: datasetId, vectorModel, agentModel }
} = await getCollectionWithDataset(collectionId);
const checkModelValid = async () => {
const agentModelData = datasetModelList?.find((item) => item.model === agentModel);
if (!agentModelData) {
return Promise.reject(`File model ${agentModel} is inValid`);
}
const vectorModelData = vectorModelList?.find((item) => item.model === vectorModel);
if (!vectorModelData) {
return Promise.reject(`Vector model ${vectorModel} is inValid`);
}
if (trainingMode === TrainingModeEnum.chunk) {
return {
maxToken: vectorModelData.maxToken * 1.3,
model: vectorModelData.model,
weight: vectorModelData.weight
};
}
if (trainingMode === TrainingModeEnum.qa || trainingMode === TrainingModeEnum.auto) {
return {
maxToken: agentModelData.maxContext * 0.8,
model: agentModelData.model,
weight: 0
};
}
return Promise.reject(`Training mode "${trainingMode}" is inValid`);
};
const { model, maxToken, weight } = await checkModelValid();
// format q and a, remove empty char
data.forEach((item) => {
item.q = simpleText(item.q);
item.a = simpleText(item.a);
item.indexes = item.indexes
?.map((index) => {
return {
...index,
text: simpleText(index.text)
};
})
.filter(Boolean);
});
// filter repeat or equal content
const set = new Set();
const filterResult: Record<string, PushDatasetDataChunkProps[]> = {
success: [],
overToken: [],
repeat: [],
error: []
};
// filter repeat content
data.forEach((item) => {
if (!item.q) {
filterResult.error.push(item);
return;
}
const text = item.q + item.a;
// count q token
const token = countPromptTokens(item.q);
if (token > maxToken) {
filterResult.overToken.push(item);
return;
}
if (set.has(text)) {
console.log('repeat', item);
filterResult.repeat.push(item);
} else {
filterResult.success.push(item);
set.add(text);
}
});
// insert data to db
const insertData = async (dataList: PushDatasetDataChunkProps[], retry = 3): Promise<number> => {
try {
const results = await MongoDatasetTraining.insertMany(
dataList.map((item, i) => ({
teamId,
tmbId,
datasetId,
collectionId,
billId,
mode: trainingMode,
prompt,
model,
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex ?? 0,
weight: weight ?? 0,
indexes: item.indexes
}))
);
await delay(500);
return results.length;
} catch (error) {
if (retry > 0) {
await delay(500);
return insertData(dataList, retry - 1);
}
return Promise.reject(error);
}
};
let insertLen = 0;
const chunkSize = 50;
const chunkList = filterResult.success.reduce(
(acc, cur) => {
const lastChunk = acc[acc.length - 1];
if (lastChunk.length < chunkSize) {
lastChunk.push(cur);
} else {
acc.push([cur]);
}
return acc;
},
[[]] as PushDatasetDataChunkProps[][]
);
for await (const chunks of chunkList) {
insertLen += await insertData(chunks);
}
delete filterResult.success;
return {
insertLen,
...filterResult
};
}