diff --git a/src/core/rag/rag-engine.ts b/src/core/rag/rag-engine.ts index 292f684..e03c42e 100644 --- a/src/core/rag/rag-engine.ts +++ b/src/core/rag/rag-engine.ts @@ -88,6 +88,7 @@ export class RAGEngine { this.embeddingModel, { chunkSize: this.settings.ragOptions.chunkSize, + batchSize: this.settings.ragOptions.batchSize, excludePatterns: this.settings.ragOptions.excludePatterns, includePatterns: this.settings.ragOptions.includePatterns, reindexAll: options.reindexAll, @@ -112,6 +113,7 @@ export class RAGEngine { await this.vectorManager.UpdateFileVectorIndex( this.embeddingModel, this.settings.ragOptions.chunkSize, + this.settings.ragOptions.batchSize, file, ) } diff --git a/src/database/modules/vector/vector-manager.ts b/src/database/modules/vector/vector-manager.ts index 172db6a..12b567e 100644 --- a/src/database/modules/vector/vector-manager.ts +++ b/src/database/modules/vector/vector-manager.ts @@ -27,7 +27,7 @@ export class VectorManager { constructor(app: App, dbManager: DBManager) { this.app = app this.dbManager = dbManager - this.repository = new VectorRepository(app, dbManager.getPgClient() as any) + this.repository = new VectorRepository(app, dbManager.getPgClient()) } async performSimilaritySearch( @@ -59,7 +59,7 @@ export class VectorManager { if (typeof global !== 'undefined' && global.gc) { global.gc() } else if (typeof window !== 'undefined' && (window as any).gc) { - (window as any).gc() + ((window as any).gc as () => void)(); } } catch (e) { // 忽略垃圾回收错误 @@ -80,6 +80,7 @@ export class VectorManager { embeddingModel: EmbeddingModel, options: { chunkSize: number + batchSize: number excludePatterns: string[] includePatterns: string[] reindexAll?: boolean @@ -194,17 +195,15 @@ export class VectorManager { const embeddingProgress = { completed: 0 } // 减少批量大小以降低内存压力 - const insertBatchSize = 32 + const batchSize = options.batchSize let batchCount = 0 try { if (embeddingModel.supportsBatch) { // 支持批量处理的提供商:使用流式处理逻辑 - const embeddingBatchSize = 32 - - for (let i = 0; i < contentChunks.length; i += embeddingBatchSize) { + for (let i = 0; i < contentChunks.length; i += batchSize) { batchCount++ - const batchChunks = contentChunks.slice(i, Math.min(i + embeddingBatchSize, contentChunks.length)) + const batchChunks = contentChunks.slice(i, Math.min(i + batchSize, contentChunks.length)) const embeddedBatch: InsertVector[] = [] @@ -267,13 +266,13 @@ export class VectorManager { const abortController = new AbortController() // 流式处理:分批处理并立即插入 - for (let i = 0; i < contentChunks.length; i += insertBatchSize) { + for (let i = 0; i < contentChunks.length; i += batchSize) { if (abortController.signal.aborted) { throw new Error('Operation was aborted') } batchCount++ - const batchChunks = contentChunks.slice(i, Math.min(i + insertBatchSize, contentChunks.length)) + const batchChunks = contentChunks.slice(i, Math.min(i + batchSize, contentChunks.length)) const embeddedBatch: InsertVector[] = [] const tasks = batchChunks.map((chunk) => @@ -357,6 +356,7 @@ export class VectorManager { async UpdateFileVectorIndex( embeddingModel: EmbeddingModel, chunkSize: number, + batchSize: number, file: TFile ) { try { @@ -412,19 +412,15 @@ export class VectorManager { }) .filter((chunk): chunk is InsertVector => chunk !== null) - // 减少批量大小以降低内存压力 - const insertBatchSize = 16 // 从64降低到16 let batchCount = 0 try { if (embeddingModel.supportsBatch) { // 支持批量处理的提供商:使用流式处理逻辑 - const embeddingBatchSize = 16 // 从64降低到16 - - for (let i = 0; i < contentChunks.length; i += embeddingBatchSize) { + for (let i = 0; i < contentChunks.length; i += batchSize) { batchCount++ - console.log(`Embedding batch ${batchCount} of ${Math.ceil(contentChunks.length / embeddingBatchSize)}`) - const batchChunks = contentChunks.slice(i, Math.min(i + embeddingBatchSize, contentChunks.length)) + console.log(`Embedding batch ${batchCount} of ${Math.ceil(contentChunks.length / batchSize)}`) + const batchChunks = contentChunks.slice(i, Math.min(i + batchSize, contentChunks.length)) const embeddedBatch: InsertVector[] = [] @@ -480,13 +476,13 @@ export class VectorManager { const abortController = new AbortController() // 流式处理:分批处理并立即插入 - for (let i = 0; i < contentChunks.length; i += insertBatchSize) { + for (let i = 0; i < contentChunks.length; i += batchSize) { if (abortController.signal.aborted) { throw new Error('Operation was aborted') } batchCount++ - const batchChunks = contentChunks.slice(i, Math.min(i + insertBatchSize, contentChunks.length)) + const batchChunks = contentChunks.slice(i, Math.min(i + batchSize, contentChunks.length)) const embeddedBatch: InsertVector[] = [] const tasks = batchChunks.map((chunk) => diff --git a/src/lang/locale/en.ts b/src/lang/locale/en.ts index f0b0b8b..3c5eec1 100644 --- a/src/lang/locale/en.ts +++ b/src/lang/locale/en.ts @@ -358,6 +358,8 @@ export default { excludePatternsDescription: 'Files matching ANY of these patterns will be excluded from indexing. One pattern per line. Uses glob patterns (e.g., "private/*", "*.tmp"). Leave empty to exclude nothing. After changing this, use the command "Rebuild entire vault index" to apply changes.', chunkSize: 'Chunk size', chunkSizeDescription: 'Set the chunk size for text splitting. After changing this, please re-index the vault using the "Rebuild entire vault index" command.', + batchSize: 'Batch size', + batchSizeDescription: 'Set the batch size for embedding. A smaller value may reduce memory usage. After changing this, please re-index the vault using the "Rebuild entire vault index" command.', thresholdTokens: 'Threshold tokens', thresholdTokensDescription: 'Maximum number of tokens before switching to RAG. If the total tokens from mentioned files exceed this, RAG will be used instead of including all file contents.', minSimilarity: 'Minimum similarity', diff --git a/src/lang/locale/zh-cn.ts b/src/lang/locale/zh-cn.ts index 8f0b98c..4c26617 100644 --- a/src/lang/locale/zh-cn.ts +++ b/src/lang/locale/zh-cn.ts @@ -360,6 +360,8 @@ export default { excludePatternsDescription: '匹配任何这些模式的文件将从索引中排除。每行一个模式。使用 glob 模式(例如,"private/*", "*.tmp")。留空以不排除任何内容。更改后,请使用命令 "重建整个 Vault 索引" 来应用更改。', chunkSize: '分块大小', chunkSizeDescription: '设置文本分割的分块大小。更改后,请使用 "重建整个 Vault 索引" 命令重新索引 Vault。', + batchSize: '批处理大小', + batchSizeDescription: '设置嵌入的批处理大小。较小的值可以减少内存使用。更改后,请使用 "重建整个 Vault 索引" 命令重新索引 Vault。', thresholdTokens: '阈值 Tokens', thresholdTokensDescription: '切换到 RAG 之前的最大 Tokens 数。如果提及文件的总 Tokens 超过此值,将使用 RAG 而不是包含所有文件内容。', minSimilarity: '最小相似度', diff --git a/src/settings/SettingTab.tsx b/src/settings/SettingTab.tsx index e9af710..194096f 100644 --- a/src/settings/SettingTab.tsx +++ b/src/settings/SettingTab.tsx @@ -468,6 +468,29 @@ export class InfioSettingTab extends PluginSettingTab { }), ) + new Setting(contentContainer) + .setName(t('settings.RAG.batchSize')) + .setDesc( + t('settings.RAG.batchSizeDescription'), + ) + .addText((text) => + text + .setPlaceholder('32') + .setValue(String(this.plugin.settings.ragOptions.batchSize)) + .onChange(async (value) => { + const batchSize = parseInt(value, 10) + if (!isNaN(batchSize)) { + await this.plugin.setSettings({ + ...this.plugin.settings, + ragOptions: { + ...this.plugin.settings.ragOptions, + batchSize, + }, + }) + } + }), + ) + new Setting(contentContainer) .setName(t('settings.RAG.thresholdTokens')) .setDesc( diff --git a/src/settings/versions/shared.ts b/src/settings/versions/shared.ts index e2f539c..f0e586f 100644 --- a/src/settings/versions/shared.ts +++ b/src/settings/versions/shared.ts @@ -4,7 +4,7 @@ export const MIN_DELAY = 0; export const MAX_DELAY = 2000; export const MIN_MAX_CHAR_LIMIT = 100; export const MAX_MAX_CHAR_LIMIT = 10000; -export const MIN_MAX_TOKENS = 128; +export const MIN_MAX_TOKENS = 4096; export const MAX_MAX_TOKENS = 8192; export const MIN_TEMPERATURE = 0.0; export const MAX_TEMPERATURE = 1.0; diff --git a/src/types/settings.test.ts b/src/types/settings.test.ts index f4e25ac..a768869 100644 --- a/src/types/settings.test.ts +++ b/src/types/settings.test.ts @@ -16,7 +16,7 @@ describe('parseSmartCopilotSettings', () => { top_p: 0.1, frequency_penalty: 0.25, presence_penalty: 0, - max_tokens: 800, + max_tokens: 4096, }, systemMessage: DEFAULT_SETTINGS.systemMessage, fewShotExamples: DEFAULT_SETTINGS.fewShotExamples, @@ -34,6 +34,7 @@ describe('parseSmartCopilotSettings', () => { }) expect(result).toEqual({ version: 0.4, + workspace: '', activeModels: DEFAULT_MODELS, activeProviderTab: 'Infio', infioApiKey: '', @@ -66,6 +67,13 @@ describe('parseSmartCopilotSettings', () => { useCustomUrl: false, models: [], }, + localproviderProvider: { + name: 'LocalProvider', + apiKey: '', + baseUrl: '', + useCustomUrl: false, + models: [], + }, anthropicProvider: { name: 'Anthropic', apiKey: '', @@ -97,7 +105,8 @@ describe('parseSmartCopilotSettings', () => { }, systemPrompt: '', ragOptions: { - chunkSize: 1000, + batchSize: 32, + chunkSize: 500, thresholdTokens: 8192, minSimilarity: 0.0, limit: 10, @@ -118,7 +127,7 @@ describe('parseSmartCopilotSettings', () => { top_p: 0.1, frequency_penalty: 0.25, presence_penalty: 0, - max_tokens: 800, + max_tokens: 4096, }, systemMessage: DEFAULT_SETTINGS.systemMessage, fewShotExamples: DEFAULT_SETTINGS.fewShotExamples, @@ -224,7 +233,8 @@ describe('settings migration', () => { embeddingModel: 'text-embedding-3-small', systemPrompt: 'system prompt', ragOptions: { - chunkSize: 1000, + batchSize: 32, + chunkSize: 500, thresholdTokens: 8192, minSimilarity: 0.0, limit: 10, @@ -239,7 +249,7 @@ describe('settings migration', () => { top_p: 0.1, frequency_penalty: 0.25, presence_penalty: 0, - max_tokens: 800, + max_tokens: 4096, }, systemMessage: DEFAULT_SETTINGS.systemMessage, fewShotExamples: DEFAULT_SETTINGS.fewShotExamples, @@ -259,6 +269,7 @@ describe('settings migration', () => { const result = parseInfioSettings(oldSettings) expect(result).toEqual({ version: 0.4, + workspace: '', activeModels: DEFAULT_MODELS, activeProviderTab: 'Infio', infioApiKey: '', @@ -291,6 +302,13 @@ describe('settings migration', () => { useCustomUrl: false, models: [], }, + localproviderProvider: { + name: 'LocalProvider', + apiKey: '', + baseUrl: '', + useCustomUrl: false, + models: [], + }, anthropicProvider: { name: 'Anthropic', apiKey: '', @@ -322,7 +340,8 @@ describe('settings migration', () => { }, systemPrompt: 'system prompt', ragOptions: { - chunkSize: 1000, + batchSize: 32, + chunkSize: 500, thresholdTokens: 8192, minSimilarity: 0.0, limit: 10, @@ -343,7 +362,7 @@ describe('settings migration', () => { top_p: 0.1, frequency_penalty: 0.25, presence_penalty: 0, - max_tokens: 800, + max_tokens: 4096, }, systemMessage: DEFAULT_SETTINGS.systemMessage, fewShotExamples: DEFAULT_SETTINGS.fewShotExamples, diff --git a/src/types/settings.ts b/src/types/settings.ts index 7227281..cd1bb86 100644 --- a/src/types/settings.ts +++ b/src/types/settings.ts @@ -210,6 +210,7 @@ const openAICompatibleModelSchema = z.object({ const ragOptionsSchema = z.object({ chunkSize: z.number().catch(1000), + batchSize: z.number().catch(32), thresholdTokens: z.number().catch(8192), minSimilarity: z.number().catch(0.0), limit: z.number().catch(10), @@ -367,7 +368,8 @@ export const InfioSettingsSchema = z.object({ // RAG Options ragOptions: ragOptionsSchema.catch({ - chunkSize: 1000, + batchSize: 32, + chunkSize: 500, thresholdTokens: 8192, minSimilarity: 0.0, limit: 10,