pref: slow query of full text search (#3044)

This commit is contained in:
Finley Ge 2024-11-04 14:01:20 +08:00 committed by archer
parent 49cd2d7a3c
commit f90803c558
No known key found for this signature in database
GPG Key ID: 4446499B846D4A9E

View File

@ -12,7 +12,7 @@ import {
DatasetDataWithCollectionType, DatasetDataWithCollectionType,
SearchDataResponseItemType SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type'; } from '@fastgpt/global/core/dataset/type';
import { DatasetColCollectionName, MongoDatasetCollection } from '../collection/schema'; import { MongoDatasetCollection } from '../collection/schema';
import { reRankRecall } from '../../../core/ai/rerank'; import { reRankRecall } from '../../../core/ai/rerank';
import { countPromptTokens } from '../../../common/string/tiktoken/index'; import { countPromptTokens } from '../../../common/string/tiktoken/index';
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils'; import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
@ -320,11 +320,13 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
const fullTextRecall = async ({ const fullTextRecall = async ({
query, query,
limit, limit,
filterCollectionIdList filterCollectionIdList,
forbidCollectionIdList
}: { }: {
query: string; query: string;
limit: number; limit: number;
filterCollectionIdList?: string[]; filterCollectionIdList?: string[];
forbidCollectionIdList: string[];
}): Promise<{ }): Promise<{
fullTextRecallResults: SearchDataResponseItemType[]; fullTextRecallResults: SearchDataResponseItemType[];
tokenLen: number; tokenLen: number;
@ -351,6 +353,13 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
$in: filterCollectionIdList.map((id) => new Types.ObjectId(id)) $in: filterCollectionIdList.map((id) => new Types.ObjectId(id))
} }
} }
: {}),
...(forbidCollectionIdList && forbidCollectionIdList.length > 0
? {
collectionId: {
$nin: forbidCollectionIdList.map((id) => new Types.ObjectId(id))
}
}
: {}) : {})
} }
}, },
@ -367,31 +376,6 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
{ {
$limit: limit $limit: limit
}, },
{
$lookup: {
from: DatasetColCollectionName,
let: { collectionId: '$collectionId' },
pipeline: [
{
$match: {
$expr: { $eq: ['$_id', '$$collectionId'] },
forbid: { $eq: true } // 匹配被禁用的数据
}
},
{
$project: {
_id: 1 // 只需要_id字段来确认匹配
}
}
],
as: 'collection'
}
},
{
$match: {
collection: { $eq: [] } // 没有 forbid=true 的数据
}
},
{ {
$project: { $project: {
_id: 1, _id: 1,
@ -509,7 +493,8 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
fullTextRecall({ fullTextRecall({
query, query,
limit: fullTextLimit, limit: fullTextLimit,
filterCollectionIdList filterCollectionIdList,
forbidCollectionIdList
}) })
]); ]);
totalTokens += tokens; totalTokens += tokens;