Archer 2c89752f67
feat: pg vector 0.8.0;perf: app pdf enhance parse (#3962)
* perf: app pdf enhance parse

* feat: pg vector 0.8.0

* update schema default

* model sort and default image

* perf: i18n

* perf: ui tip
2025-03-05 15:09:46 +08:00

77 lines
2.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { NextAPI } from '@/service/middleware/entry';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { NextApiRequest, NextApiResponse } from 'next';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
import { PG_ADDRESS } from '@fastgpt/service/common/vectorStore/constants';
// 所有 trainingType=auto 的 collection都改成 trainingType=chunk
const updateCollections = async () => {
await MongoDatasetCollection.updateMany(
{
trainingType: DatasetCollectionDataProcessModeEnum.auto
},
{
$set: {
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
autoIndexes: true
}
}
);
};
const updateData = async () => {
await MongoDatasetData.updateMany({ indexes: { $exists: true } }, [
{
$set: {
indexes: {
$map: {
input: '$indexes',
as: 'index',
in: {
$mergeObjects: [
'$$index',
{
type: {
$cond: {
if: { $eq: ['$$index.defaultIndex', true] },
then: DatasetDataIndexTypeEnum.default,
else: DatasetDataIndexTypeEnum.custom
}
}
}
]
}
}
}
}
}
]);
};
const upgradePgVector = async () => {
if (!PG_ADDRESS) return;
await PgClient.query(`
ALTER EXTENSION vector UPDATE;
`);
};
async function handler(req: NextApiRequest, _res: NextApiResponse) {
await authCert({ req, authRoot: true });
console.log('升级 PG vector 插件');
await upgradePgVector();
console.log('变更所有 collection 的 trainingType 为 chunk');
await updateCollections();
console.log(
"更新所有 data 的 index, autoIndex=true 的增加type='default',其他的增加 type='custom'"
);
await updateData();
return { success: true };
}
export default NextAPI(handler);