update vector query time, remove delete file process
This commit is contained in:
parent
47de767b50
commit
5c24993ab9
@ -53,6 +53,7 @@ export class RAGEngine {
|
|||||||
throw new Error('Embedding model is not set')
|
throw new Error('Embedding model is not set')
|
||||||
}
|
}
|
||||||
await this.initializeDimension()
|
await this.initializeDimension()
|
||||||
|
console.log("updateVaultIndex")
|
||||||
|
|
||||||
await this.vectorManager.updateVaultIndex(
|
await this.vectorManager.updateVaultIndex(
|
||||||
this.embeddingModel,
|
this.embeddingModel,
|
||||||
@ -69,6 +70,7 @@ export class RAGEngine {
|
|||||||
})
|
})
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
console.log("updateVaultIndex done")
|
||||||
this.initialized = true
|
this.initialized = true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -121,9 +123,10 @@ export class RAGEngine {
|
|||||||
|
|
||||||
await this.initializeDimension()
|
await this.initializeDimension()
|
||||||
|
|
||||||
if (!this.initialized) {
|
// if (!this.initialized) {
|
||||||
await this.updateVaultIndex({ reindexAll: false }, onQueryProgressChange)
|
// console.log("need to updateVaultIndex")
|
||||||
}
|
// await this.updateVaultIndex({ reindexAll: false }, onQueryProgressChange)
|
||||||
|
// }
|
||||||
const queryEmbedding = await this.getEmbedding(query)
|
const queryEmbedding = await this.getEmbedding(query)
|
||||||
onQueryProgressChange?.({
|
onQueryProgressChange?.({
|
||||||
type: 'querying',
|
type: 'querying',
|
||||||
|
|||||||
@ -85,6 +85,7 @@ export class VectorManager {
|
|||||||
},
|
},
|
||||||
updateProgress?: (indexProgress: IndexProgress) => void,
|
updateProgress?: (indexProgress: IndexProgress) => void,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
|
console.log("updateVaultIndex start")
|
||||||
let filesToIndex: TFile[]
|
let filesToIndex: TFile[]
|
||||||
if (options.reindexAll) {
|
if (options.reindexAll) {
|
||||||
filesToIndex = await this.getFilesToIndex({
|
filesToIndex = await this.getFilesToIndex({
|
||||||
@ -95,6 +96,7 @@ export class VectorManager {
|
|||||||
})
|
})
|
||||||
await this.repository.clearAllVectors(embeddingModel)
|
await this.repository.clearAllVectors(embeddingModel)
|
||||||
} else {
|
} else {
|
||||||
|
console.log("updateVaultIndex cleanVectorsForDeletedFiles")
|
||||||
await this.cleanVectorsForDeletedFiles(embeddingModel)
|
await this.cleanVectorsForDeletedFiles(embeddingModel)
|
||||||
filesToIndex = await this.getFilesToIndex({
|
filesToIndex = await this.getFilesToIndex({
|
||||||
embeddingModel: embeddingModel,
|
embeddingModel: embeddingModel,
|
||||||
@ -168,13 +170,13 @@ export class VectorManager {
|
|||||||
|
|
||||||
const embeddingProgress = { completed: 0 }
|
const embeddingProgress = { completed: 0 }
|
||||||
// 减少批量大小以降低内存压力
|
// 减少批量大小以降低内存压力
|
||||||
const insertBatchSize = 16 // 从64降低到16
|
const insertBatchSize = 32
|
||||||
let batchCount = 0
|
let batchCount = 0
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (embeddingModel.supportsBatch) {
|
if (embeddingModel.supportsBatch) {
|
||||||
// 支持批量处理的提供商:使用流式处理逻辑
|
// 支持批量处理的提供商:使用流式处理逻辑
|
||||||
const embeddingBatchSize = 16 // 从64降低到16
|
const embeddingBatchSize = 32
|
||||||
|
|
||||||
for (let i = 0; i < contentChunks.length; i += embeddingBatchSize) {
|
for (let i = 0; i < contentChunks.length; i += embeddingBatchSize) {
|
||||||
batchCount++
|
batchCount++
|
||||||
@ -226,7 +228,7 @@ export class VectorManager {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 不支持批量处理的提供商:使用流式处理逻辑
|
// 不支持批量处理的提供商:使用流式处理逻辑
|
||||||
const limit = pLimit(10) // 从50降低到10,减少并发压力
|
const limit = pLimit(32) // 从50降低到10,减少并发压力
|
||||||
const abortController = new AbortController()
|
const abortController = new AbortController()
|
||||||
|
|
||||||
// 流式处理:分批处理并立即插入
|
// 流式处理:分批处理并立即插入
|
||||||
@ -482,7 +484,9 @@ export class VectorManager {
|
|||||||
private async cleanVectorsForDeletedFiles(
|
private async cleanVectorsForDeletedFiles(
|
||||||
embeddingModel: EmbeddingModel,
|
embeddingModel: EmbeddingModel,
|
||||||
) {
|
) {
|
||||||
|
console.log("cleanVectorsForDeletedFiles start")
|
||||||
const indexedFilePaths = await this.repository.getAllIndexedFilePaths(embeddingModel)
|
const indexedFilePaths = await this.repository.getAllIndexedFilePaths(embeddingModel)
|
||||||
|
console.log("indexedFilePaths: ", indexedFilePaths)
|
||||||
const needToDelete = indexedFilePaths.filter(filePath => !this.app.vault.getAbstractFileByPath(filePath))
|
const needToDelete = indexedFilePaths.filter(filePath => !this.app.vault.getAbstractFileByPath(filePath))
|
||||||
if (needToDelete.length > 0) {
|
if (needToDelete.length > 0) {
|
||||||
await this.repository.deleteVectorsForMultipleFiles(
|
await this.repository.deleteVectorsForMultipleFiles(
|
||||||
@ -490,6 +494,7 @@ export class VectorManager {
|
|||||||
embeddingModel,
|
embeddingModel,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
console.log("cleanVectorsForDeletedFiles done")
|
||||||
}
|
}
|
||||||
|
|
||||||
private async getFilesToIndex({
|
private async getFilesToIndex({
|
||||||
@ -503,6 +508,7 @@ export class VectorManager {
|
|||||||
includePatterns: string[]
|
includePatterns: string[]
|
||||||
reindexAll?: boolean
|
reindexAll?: boolean
|
||||||
}): Promise<TFile[]> {
|
}): Promise<TFile[]> {
|
||||||
|
console.log("getFilesToIndex")
|
||||||
let filesToIndex = this.app.vault.getMarkdownFiles()
|
let filesToIndex = this.app.vault.getMarkdownFiles()
|
||||||
|
|
||||||
filesToIndex = filesToIndex.filter((file) => {
|
filesToIndex = filesToIndex.filter((file) => {
|
||||||
@ -518,7 +524,7 @@ export class VectorManager {
|
|||||||
if (reindexAll) {
|
if (reindexAll) {
|
||||||
return filesToIndex
|
return filesToIndex
|
||||||
}
|
}
|
||||||
|
console.log("filesToIndex: ", filesToIndex)
|
||||||
// Check for updated or new files
|
// Check for updated or new files
|
||||||
filesToIndex = await Promise.all(
|
filesToIndex = await Promise.all(
|
||||||
filesToIndex.map(async (file) => {
|
filesToIndex.map(async (file) => {
|
||||||
@ -541,6 +547,7 @@ export class VectorManager {
|
|||||||
const outOfDate = file.stat.mtime > fileChunks[0].mtime
|
const outOfDate = file.stat.mtime > fileChunks[0].mtime
|
||||||
if (outOfDate) {
|
if (outOfDate) {
|
||||||
// File has changed, so we need to re-index it
|
// File has changed, so we need to re-index it
|
||||||
|
console.log("File has changed, so we need to re-index it", file.path)
|
||||||
return file
|
return file
|
||||||
}
|
}
|
||||||
return null
|
return null
|
||||||
|
|||||||
@ -76,6 +76,22 @@ export const migrations: Record<string, SqlMigration> = {
|
|||||||
CREATE INDEX IF NOT EXISTS "embeddingIndex_384"
|
CREATE INDEX IF NOT EXISTS "embeddingIndex_384"
|
||||||
ON "embeddings_384"
|
ON "embeddings_384"
|
||||||
USING hnsw ("embedding" vector_cosine_ops);
|
USING hnsw ("embedding" vector_cosine_ops);
|
||||||
|
|
||||||
|
-- Create B-tree indexes for path field to optimize file path queries
|
||||||
|
CREATE INDEX IF NOT EXISTS "pathIndex_1536"
|
||||||
|
ON "embeddings_1536" ("path");
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS "pathIndex_1024"
|
||||||
|
ON "embeddings_1024" ("path");
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS "pathIndex_768"
|
||||||
|
ON "embeddings_768" ("path");
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS "pathIndex_512"
|
||||||
|
ON "embeddings_512" ("path");
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS "pathIndex_384"
|
||||||
|
ON "embeddings_384" ("path");
|
||||||
`
|
`
|
||||||
},
|
},
|
||||||
template: {
|
template: {
|
||||||
|
|||||||
@ -465,17 +465,17 @@ export class PromptGenerator {
|
|||||||
this.app
|
this.app
|
||||||
)
|
)
|
||||||
|
|
||||||
// 为文件夹内容创建Markdown文件
|
// // 为文件夹内容创建Markdown文件
|
||||||
const markdownFilePath = await this.createMarkdownFileForContent(
|
// const markdownFilePath = await this.createMarkdownFileForContent(
|
||||||
`${folder.path}/folder-contents`,
|
// `${folder.path}/folder-contents`,
|
||||||
content,
|
// content,
|
||||||
false
|
// false
|
||||||
)
|
// )
|
||||||
|
|
||||||
completedFolders++
|
completedFolders++
|
||||||
folderContents.push(`<folder_content path="${folder.path}">\n${content}\n</folder_content>`)
|
folderContents.push(`<folder_content path="${folder.path}">\n${content}\n</folder_content>`)
|
||||||
folderContentsForProgress.push({ path: markdownFilePath, content })
|
folderContentsForProgress.push({ path: folder.path, content })
|
||||||
allFileReadResults.push({ path: markdownFilePath, content })
|
allFileReadResults.push({ path: folder.path, content })
|
||||||
}
|
}
|
||||||
|
|
||||||
// 文件夹读取完成(如果之前没有文件需要读取)
|
// 文件夹读取完成(如果之前没有文件需要读取)
|
||||||
@ -665,6 +665,7 @@ export class PromptGenerator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (isOverThreshold) {
|
if (isOverThreshold) {
|
||||||
|
console.log("isOverThreshold", isOverThreshold)
|
||||||
fileContentsPrompts = files.map((file) => {
|
fileContentsPrompts = files.map((file) => {
|
||||||
return `<file_content path="${file.path}">\n(Content omitted due to token limit. Relevant sections will be provided by semantic search below.)\n</file_content>`
|
return `<file_content path="${file.path}">\n(Content omitted due to token limit. Relevant sections will be provided by semantic search below.)\n</file_content>`
|
||||||
}).join('\n')
|
}).join('\n')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user