update

2025-06-12 13:35:00 +08:00 · 2025-06-12 13:35:00 +08:00 · b20b4f9e19
commit b20b4f9e19
parent 3ce55899df
3 changed files with 296 additions and 53 deletions
--- a/src/core/rag/embedding.ts
+++ b/src/core/rag/embedding.ts
@ -30,6 +30,7 @@ export const getEmbeddingModel = (
 			return {
 				id: settings.embeddingModelId,
 				dimension: modelInfo.dimensions,
+				supportsBatch: true,
 				getEmbedding: async (text: string) => {
 					try {
 						if (!openai.apiKey) {
@ -54,6 +55,31 @@ export const getEmbeddingModel = (
 						throw error
 					}
 				},
+				getBatchEmbeddings: async (texts: string[]) => {
+					console.log("use getBatchEmbeddings", texts.length)
+					try {
+						if (!openai.apiKey) {
+							throw new LLMAPIKeyNotSetException(
+								'OpenAI API key is missing. Please set it in settings menu.',
+							)
+						}
+						const embedding = await openai.embeddings.create({
+							model: settings.embeddingModelId,
+							input: texts,
+						})
+						return embedding.data.map(item => item.embedding)
+					} catch (error) {
+						if (
+							error.status === 429 &&
+							error.message.toLowerCase().includes('rate limit')
+						) {
+							throw new LLMRateLimitExceededException(
+								'OpenAI API rate limit exceeded. Please try again later.',
+							)
+						}
+						throw error
+					}
+				},
 			}
 		}
 		case ApiProvider.OpenAI: {
@ -67,6 +93,7 @@ export const getEmbeddingModel = (
 			return {
 				id: settings.embeddingModelId,
 				dimension: modelInfo.dimensions,
+				supportsBatch: true,
 				getEmbedding: async (text: string) => {
 					try {
 						if (!openai.apiKey) {
@ -91,6 +118,30 @@ export const getEmbeddingModel = (
 						throw error
 					}
 				},
+				getBatchEmbeddings: async (texts: string[]) => {
+					try {
+						if (!openai.apiKey) {
+							throw new LLMAPIKeyNotSetException(
+								'OpenAI API key is missing. Please set it in settings menu.',
+							)
+						}
+						const embedding = await openai.embeddings.create({
+							model: settings.embeddingModelId,
+							input: texts,
+						})
+						return embedding.data.map(item => item.embedding)
+					} catch (error) {
+						if (
+							error.status === 429 &&
+							error.message.toLowerCase().includes('rate limit')
+						) {
+							throw new LLMRateLimitExceededException(
+								'OpenAI API rate limit exceeded. Please try again later.',
+							)
+						}
+						throw error
+					}
+				},
 			}
 		}
 		case ApiProvider.SiliconFlow: {
@ -104,6 +155,7 @@ export const getEmbeddingModel = (
 			return {
 				id: settings.embeddingModelId,
 				dimension: modelInfo.dimensions,
+				supportsBatch: true,
 				getEmbedding: async (text: string) => {
 					try {
 						if (!openai.apiKey) {
@ -128,6 +180,30 @@ export const getEmbeddingModel = (
 						throw error
 					}
 				},
+				getBatchEmbeddings: async (texts: string[]) => {
+					try {
+						if (!openai.apiKey) {
+							throw new LLMAPIKeyNotSetException(
+								'SiliconFlow API key is missing. Please set it in settings menu.',
+							)
+						}
+						const embedding = await openai.embeddings.create({
+							model: settings.embeddingModelId,
+							input: texts,
+						})
+						return embedding.data.map(item => item.embedding)
+					} catch (error) {
+						if (
+							error.status === 429 &&
+							error.message.toLowerCase().includes('rate limit')
+						) {
+							throw new LLMRateLimitExceededException(
+								'SiliconFlow API rate limit exceeded. Please try again later.',
+							)
+						}
+						throw error
+					}
+				},
 			}
 		}
 		case ApiProvider.AlibabaQwen: {
@ -141,6 +217,7 @@ export const getEmbeddingModel = (
 			return {
 				id: settings.embeddingModelId,
 				dimension: modelInfo.dimensions,
+				supportsBatch: false,
 				getEmbedding: async (text: string) => {
 					try {
 						if (!openai.apiKey) {
@ -165,6 +242,30 @@ export const getEmbeddingModel = (
 						throw error
 					}
 				},
+				getBatchEmbeddings: async (texts: string[]) => {
+					try {
+						if (!openai.apiKey) {
+							throw new LLMAPIKeyNotSetException(
+								'Alibaba Qwen API key is missing. Please set it in settings menu.',
+							)
+						}
+						const embedding = await openai.embeddings.create({
+							model: settings.embeddingModelId,
+							input: texts,
+						})
+						return embedding.data.map(item => item.embedding)
+					} catch (error) {
+						if (
+							error.status === 429 &&
+							error.message.toLowerCase().includes('rate limit')
+						) {
+							throw new LLMRateLimitExceededException(
+								'Alibaba Qwen API rate limit exceeded. Please try again later.',
+							)
+						}
+						throw error
+					}
+				},
 			}
 		}
 		case ApiProvider.Google: {
@ -174,6 +275,7 @@ export const getEmbeddingModel = (
 			return {
 				id: settings.embeddingModelId,
 				dimension: modelInfo.dimensions,
+				supportsBatch: false,
 				getEmbedding: async (text: string) => {
 					try {
 						const response = await model.embedContent(text)
@ -190,6 +292,27 @@ export const getEmbeddingModel = (
 						throw error
 					}
 				},
+				getBatchEmbeddings: async (texts: string[]) => {
+					try {
+						const embeddings = await Promise.all(
+							texts.map(async (text) => {
+								const response = await model.embedContent(text)
+								return response.embedding.values
+							})
+						)
+						return embeddings
+					} catch (error) {
+						if (
+							error.status === 429 &&
+							error.message.includes('RATE_LIMIT_EXCEEDED')
+						) {
+							throw new LLMRateLimitExceededException(
+								'Gemini API rate limit exceeded. Please try again later.',
+							)
+						}
+						throw error
+					}
+				},
 			}
 		}
 		case ApiProvider.Ollama: {
@ -201,6 +324,7 @@ export const getEmbeddingModel = (
 			return {
 				id: settings.embeddingModelId,
 				dimension: 0,
+				supportsBatch: false,
 				getEmbedding: async (text: string) => {
 					if (!settings.ollamaProvider.baseUrl) {
 						throw new LLMBaseUrlNotSetException(
@ -213,6 +337,18 @@ export const getEmbeddingModel = (
 					})
 					return embedding.data[0].embedding
 				},
+				getBatchEmbeddings: async (texts: string[]) => {
+					if (!settings.ollamaProvider.baseUrl) {
+						throw new LLMBaseUrlNotSetException(
+							'Ollama Address is missing. Please set it in settings menu.',
+						)
+					}
+					const embedding = await openai.embeddings.create({
+						model: settings.embeddingModelId,
+						input: texts,
+					})
+					return embedding.data.map(item => item.embedding)
+				},
 			}
 		}
 		case ApiProvider.OpenAICompatible: {
@ -224,6 +360,7 @@ export const getEmbeddingModel = (
 			return {
 				id: settings.embeddingModelId,
 				dimension: 0,
+				supportsBatch: false,
 				getEmbedding: async (text: string) => {
 					try {
 						if (!openai.apiKey) {
@ -249,6 +386,31 @@ export const getEmbeddingModel = (
 						throw error
 					}
 				},
+				getBatchEmbeddings: async (texts: string[]) => {
+					try {
+						if (!openai.apiKey) {
+							throw new LLMAPIKeyNotSetException(
+								'OpenAI Compatible API key is missing. Please set it in settings menu.',
+							)
+						}
+						const embedding = await openai.embeddings.create({
+							model: settings.embeddingModelId,
+							input: texts,
+							encoding_format: "float",
+						})
+						return embedding.data.map(item => item.embedding)
+					} catch (error) {
+						if (
+							error.status === 429 &&
+							error.message.toLowerCase().includes('rate limit')
+						) {
+							throw new LLMRateLimitExceededException(
+								'OpenAI Compatible API rate limit exceeded. Please try again later.',
+							)
+						}
+						throw error
+					}
+				},
 			}
 		}
 		default:
--- a/src/database/modules/vector/vector-manager.ts
+++ b/src/database/modules/vector/vector-manager.ts
@ -131,7 +131,50 @@ export class VectorManager {

 		const embeddingProgress = { completed: 0 }
 		const embeddingChunks: InsertVector[] = []
-		const batchSize = 100
+		const insertBatchSize = 100 // 数据库插入批量大小
+		
+		try {
+			if (embeddingModel.supportsBatch) {
+				// 支持批量处理的提供商：使用批量处理逻辑
+				const embeddingBatchSize = 100 // API批量处理大小
+				
+				for (let i = 0; i < contentChunks.length; i += embeddingBatchSize) {
+					const batchChunks = contentChunks.slice(i, Math.min(i + embeddingBatchSize, contentChunks.length))
+					const batchTexts = batchChunks.map(chunk => chunk.content)
+					
+					await backOff(
+						async () => {
+							const batchEmbeddings = await embeddingModel.getBatchEmbeddings(batchTexts)
+							
+							// 合并embedding结果到chunk数据
+							for (let j = 0; j < batchChunks.length; j++) {
+								const embeddedChunk: InsertVector = {
+									path: batchChunks[j].path,
+									mtime: batchChunks[j].mtime,
+									content: batchChunks[j].content,
+									embedding: batchEmbeddings[j],
+									metadata: batchChunks[j].metadata,
+								}
+								embeddingChunks.push(embeddedChunk)
+							}
+							
+							embeddingProgress.completed += batchChunks.length
+							updateProgress?.({
+								completedChunks: embeddingProgress.completed,
+								totalChunks: contentChunks.length,
+								totalFiles: filesToIndex.length,
+							})
+						},
+						{
+							numOfAttempts: 5,
+							startingDelay: 1000,
+							timeMultiple: 1.5,
+							jitter: 'full',
+						},
+					)
+				}
+			} else {
+				// 不支持批量处理的提供商：使用原来的逐个处理逻辑
 				const limit = pLimit(50)
 				const abortController = new AbortController()
 				const tasks = contentChunks.map((chunk) =>
@ -172,8 +215,8 @@ export class VectorManager {
 					}),
 				)
 				
-		try {
 				await Promise.all(tasks)
+			}

 			// all embedding generated, batch insert
 			if (embeddingChunks.length > 0) {
@ -182,7 +225,7 @@ export class VectorManager {
 				while (inserted < embeddingChunks.length) {
 					const chunksToInsert = embeddingChunks.slice(
 						inserted,
-						Math.min(inserted + batchSize, embeddingChunks.length)
+						Math.min(inserted + insertBatchSize, embeddingChunks.length)
 					)
 					await this.repository.insertVectors(chunksToInsert, embeddingModel)
 					inserted += chunksToInsert.length
@ -242,6 +285,43 @@ export class VectorManager {
 		})

 		const embeddingChunks: InsertVector[] = []
+		const insertBatchSize = 100 // 数据库插入批量大小
+		
+		try {
+			if (embeddingModel.supportsBatch) {
+				// 支持批量处理的提供商：使用批量处理逻辑
+				const embeddingBatchSize = 100 // API批量处理大小
+				
+				for (let i = 0; i < contentChunks.length; i += embeddingBatchSize) {
+					const batchChunks = contentChunks.slice(i, Math.min(i + embeddingBatchSize, contentChunks.length))
+					const batchTexts = batchChunks.map(chunk => chunk.content)
+					
+					await backOff(
+						async () => {
+							const batchEmbeddings = await embeddingModel.getBatchEmbeddings(batchTexts)
+							
+							// 合并embedding结果到chunk数据
+							for (let j = 0; j < batchChunks.length; j++) {
+								const embeddedChunk: InsertVector = {
+									path: batchChunks[j].path,
+									mtime: batchChunks[j].mtime,
+									content: batchChunks[j].content,
+									embedding: batchEmbeddings[j],
+									metadata: batchChunks[j].metadata,
+								}
+								embeddingChunks.push(embeddedChunk)
+							}
+						},
+						{
+							numOfAttempts: 5,
+							startingDelay: 1000,
+							timeMultiple: 1.5,
+							jitter: 'full',
+						},
+					)
+				}
+			} else {
+				// 不支持批量处理的提供商：使用原来的逐个处理逻辑
 				const limit = pLimit(50)
 				const abortController = new AbortController()
 				const tasks = contentChunks.map((chunk) =>
@ -276,15 +356,14 @@ export class VectorManager {
 					}),
 				)
 				
-		try {
 				await Promise.all(tasks)
+			}

 			// all embedding generated, batch insert
 			if (embeddingChunks.length > 0) {
-				const batchSize = 100
 				let inserted = 0
 				while (inserted < embeddingChunks.length) {
-					const chunksToInsert = embeddingChunks.slice(inserted, Math.min(inserted + batchSize, embeddingChunks.length))
+					const chunksToInsert = embeddingChunks.slice(inserted, Math.min(inserted + insertBatchSize, embeddingChunks.length))
 					await this.repository.insertVectors(chunksToInsert, embeddingModel)
 					inserted += chunksToInsert.length
 				}
--- a/src/types/embedding.ts
+++ b/src/types/embedding.ts
@ -17,5 +17,7 @@ export type EmbeddingModelOption = {
 export type EmbeddingModel = {
  id: string
  dimension: number
+  supportsBatch: boolean
  getEmbedding: (text: string) => Promise<number[]>
+  getBatchEmbeddings: (texts: string[]) => Promise<number[][]>
 }