update vector query time, remove delete file process

This commit is contained in:
duanfuxiang 2025-06-14 13:21:29 +08:00
parent 47de767b50
commit 5c24993ab9
4 changed files with 43 additions and 16 deletions

View File

@ -53,6 +53,7 @@ export class RAGEngine {
throw new Error('Embedding model is not set') throw new Error('Embedding model is not set')
} }
await this.initializeDimension() await this.initializeDimension()
console.log("updateVaultIndex")
await this.vectorManager.updateVaultIndex( await this.vectorManager.updateVaultIndex(
this.embeddingModel, this.embeddingModel,
@ -69,6 +70,7 @@ export class RAGEngine {
}) })
}, },
) )
console.log("updateVaultIndex done")
this.initialized = true this.initialized = true
} }
@ -121,9 +123,10 @@ export class RAGEngine {
await this.initializeDimension() await this.initializeDimension()
if (!this.initialized) { // if (!this.initialized) {
await this.updateVaultIndex({ reindexAll: false }, onQueryProgressChange) // console.log("need to updateVaultIndex")
} // await this.updateVaultIndex({ reindexAll: false }, onQueryProgressChange)
// }
const queryEmbedding = await this.getEmbedding(query) const queryEmbedding = await this.getEmbedding(query)
onQueryProgressChange?.({ onQueryProgressChange?.({
type: 'querying', type: 'querying',

View File

@ -85,6 +85,7 @@ export class VectorManager {
}, },
updateProgress?: (indexProgress: IndexProgress) => void, updateProgress?: (indexProgress: IndexProgress) => void,
): Promise<void> { ): Promise<void> {
console.log("updateVaultIndex start")
let filesToIndex: TFile[] let filesToIndex: TFile[]
if (options.reindexAll) { if (options.reindexAll) {
filesToIndex = await this.getFilesToIndex({ filesToIndex = await this.getFilesToIndex({
@ -95,6 +96,7 @@ export class VectorManager {
}) })
await this.repository.clearAllVectors(embeddingModel) await this.repository.clearAllVectors(embeddingModel)
} else { } else {
console.log("updateVaultIndex cleanVectorsForDeletedFiles")
await this.cleanVectorsForDeletedFiles(embeddingModel) await this.cleanVectorsForDeletedFiles(embeddingModel)
filesToIndex = await this.getFilesToIndex({ filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel, embeddingModel: embeddingModel,
@ -168,13 +170,13 @@ export class VectorManager {
const embeddingProgress = { completed: 0 } const embeddingProgress = { completed: 0 }
// 减少批量大小以降低内存压力 // 减少批量大小以降低内存压力
const insertBatchSize = 16 // 从64降低到16 const insertBatchSize = 32
let batchCount = 0 let batchCount = 0
try { try {
if (embeddingModel.supportsBatch) { if (embeddingModel.supportsBatch) {
// 支持批量处理的提供商:使用流式处理逻辑 // 支持批量处理的提供商:使用流式处理逻辑
const embeddingBatchSize = 16 // 从64降低到16 const embeddingBatchSize = 32
for (let i = 0; i < contentChunks.length; i += embeddingBatchSize) { for (let i = 0; i < contentChunks.length; i += embeddingBatchSize) {
batchCount++ batchCount++
@ -226,7 +228,7 @@ export class VectorManager {
} }
} else { } else {
// 不支持批量处理的提供商:使用流式处理逻辑 // 不支持批量处理的提供商:使用流式处理逻辑
const limit = pLimit(10) // 从50降低到10减少并发压力 const limit = pLimit(32) // 从50降低到10减少并发压力
const abortController = new AbortController() const abortController = new AbortController()
// 流式处理:分批处理并立即插入 // 流式处理:分批处理并立即插入
@ -482,7 +484,9 @@ export class VectorManager {
private async cleanVectorsForDeletedFiles( private async cleanVectorsForDeletedFiles(
embeddingModel: EmbeddingModel, embeddingModel: EmbeddingModel,
) { ) {
console.log("cleanVectorsForDeletedFiles start")
const indexedFilePaths = await this.repository.getAllIndexedFilePaths(embeddingModel) const indexedFilePaths = await this.repository.getAllIndexedFilePaths(embeddingModel)
console.log("indexedFilePaths: ", indexedFilePaths)
const needToDelete = indexedFilePaths.filter(filePath => !this.app.vault.getAbstractFileByPath(filePath)) const needToDelete = indexedFilePaths.filter(filePath => !this.app.vault.getAbstractFileByPath(filePath))
if (needToDelete.length > 0) { if (needToDelete.length > 0) {
await this.repository.deleteVectorsForMultipleFiles( await this.repository.deleteVectorsForMultipleFiles(
@ -490,6 +494,7 @@ export class VectorManager {
embeddingModel, embeddingModel,
) )
} }
console.log("cleanVectorsForDeletedFiles done")
} }
private async getFilesToIndex({ private async getFilesToIndex({
@ -502,7 +507,8 @@ export class VectorManager {
excludePatterns: string[] excludePatterns: string[]
includePatterns: string[] includePatterns: string[]
reindexAll?: boolean reindexAll?: boolean
}): Promise<TFile[]> { }): Promise<TFile[]> {
console.log("getFilesToIndex")
let filesToIndex = this.app.vault.getMarkdownFiles() let filesToIndex = this.app.vault.getMarkdownFiles()
filesToIndex = filesToIndex.filter((file) => { filesToIndex = filesToIndex.filter((file) => {
@ -518,7 +524,7 @@ export class VectorManager {
if (reindexAll) { if (reindexAll) {
return filesToIndex return filesToIndex
} }
console.log("filesToIndex: ", filesToIndex)
// Check for updated or new files // Check for updated or new files
filesToIndex = await Promise.all( filesToIndex = await Promise.all(
filesToIndex.map(async (file) => { filesToIndex.map(async (file) => {
@ -541,6 +547,7 @@ export class VectorManager {
const outOfDate = file.stat.mtime > fileChunks[0].mtime const outOfDate = file.stat.mtime > fileChunks[0].mtime
if (outOfDate) { if (outOfDate) {
// File has changed, so we need to re-index it // File has changed, so we need to re-index it
console.log("File has changed, so we need to re-index it", file.path)
return file return file
} }
return null return null

View File

@ -76,6 +76,22 @@ export const migrations: Record<string, SqlMigration> = {
CREATE INDEX IF NOT EXISTS "embeddingIndex_384" CREATE INDEX IF NOT EXISTS "embeddingIndex_384"
ON "embeddings_384" ON "embeddings_384"
USING hnsw ("embedding" vector_cosine_ops); USING hnsw ("embedding" vector_cosine_ops);
-- Create B-tree indexes for path field to optimize file path queries
CREATE INDEX IF NOT EXISTS "pathIndex_1536"
ON "embeddings_1536" ("path");
CREATE INDEX IF NOT EXISTS "pathIndex_1024"
ON "embeddings_1024" ("path");
CREATE INDEX IF NOT EXISTS "pathIndex_768"
ON "embeddings_768" ("path");
CREATE INDEX IF NOT EXISTS "pathIndex_512"
ON "embeddings_512" ("path");
CREATE INDEX IF NOT EXISTS "pathIndex_384"
ON "embeddings_384" ("path");
` `
}, },
template: { template: {

View File

@ -465,17 +465,17 @@ export class PromptGenerator {
this.app this.app
) )
// 为文件夹内容创建Markdown文件 // // 为文件夹内容创建Markdown文件
const markdownFilePath = await this.createMarkdownFileForContent( // const markdownFilePath = await this.createMarkdownFileForContent(
`${folder.path}/folder-contents`, // `${folder.path}/folder-contents`,
content, // content,
false // false
) // )
completedFolders++ completedFolders++
folderContents.push(`<folder_content path="${folder.path}">\n${content}\n</folder_content>`) folderContents.push(`<folder_content path="${folder.path}">\n${content}\n</folder_content>`)
folderContentsForProgress.push({ path: markdownFilePath, content }) folderContentsForProgress.push({ path: folder.path, content })
allFileReadResults.push({ path: markdownFilePath, content }) allFileReadResults.push({ path: folder.path, content })
} }
// 文件夹读取完成(如果之前没有文件需要读取) // 文件夹读取完成(如果之前没有文件需要读取)
@ -665,6 +665,7 @@ export class PromptGenerator {
} }
} }
if (isOverThreshold) { if (isOverThreshold) {
console.log("isOverThreshold", isOverThreshold)
fileContentsPrompts = files.map((file) => { fileContentsPrompts = files.map((file) => {
return `<file_content path="${file.path}">\n(Content omitted due to token limit. Relevant sections will be provided by semantic search below.)\n</file_content>` return `<file_content path="${file.path}">\n(Content omitted due to token limit. Relevant sections will be provided by semantic search below.)\n</file_content>`
}).join('\n') }).join('\n')