From 4f5b3f5d04f3edae769c9c34ace9bf1d9db94446 Mon Sep 17 00:00:00 2001 From: duanfuxiang Date: Mon, 30 Jun 2025 11:26:24 +0800 Subject: [PATCH] update save trans to database --- src/ChatView.tsx | 5 +- src/components/chat-view/ChatView.tsx | 36 +- src/contexts/TransContext.tsx | 39 + src/core/prompts/tools/call-insights.ts | 6 +- src/core/prompts/tools/index.ts | 10 +- src/core/transformations/run_trans.ts | 389 ---------- src/core/transformations/trans-engine.ts | 683 ++++++++++++++++++ src/core/transformations/usage-example.ts | 181 ----- .../modules/insight/insight-manager.ts | 32 +- .../modules/insight/insight-repository.ts | 49 +- src/database/schema.ts | 3 +- src/database/sql.ts | 16 + src/main.ts | 25 + src/utils/parse-infio-block.ts | 2 +- 14 files changed, 869 insertions(+), 607 deletions(-) create mode 100644 src/contexts/TransContext.tsx delete mode 100644 src/core/transformations/run_trans.ts create mode 100644 src/core/transformations/trans-engine.ts delete mode 100644 src/core/transformations/usage-example.ts diff --git a/src/ChatView.tsx b/src/ChatView.tsx index 44c5dc2..a7978dc 100644 --- a/src/ChatView.tsx +++ b/src/ChatView.tsx @@ -15,6 +15,7 @@ import { LLMProvider } from './contexts/LLMContext' import { McpHubProvider } from './contexts/McpHubContext' import { RAGProvider } from './contexts/RAGContext' import { SettingsProvider } from './contexts/SettingsContext' +import { TransProvider } from './contexts/TransContext' import InfioPlugin from './main' import { MentionableBlockData } from './types/mentionable' import { InfioSettings } from './types/settings' @@ -96,7 +97,8 @@ export class ChatView extends ItemView { > this.plugin.getRAGEngine()}> - + this.plugin.getTransEngine()}> + this.plugin.getMcpHub()}> @@ -109,6 +111,7 @@ export class ChatView extends ItemView { + diff --git a/src/components/chat-view/ChatView.tsx b/src/components/chat-view/ChatView.tsx index da3ffb7..9aa3e1f 100644 --- a/src/components/chat-view/ChatView.tsx +++ b/src/components/chat-view/ChatView.tsx @@ -24,6 +24,7 @@ import { useLLM } from '../../contexts/LLMContext' import { useMcpHub } from '../../contexts/McpHubContext' import { useRAG } from '../../contexts/RAGContext' import { useSettings } from '../../contexts/SettingsContext' +import { useTrans } from '../../contexts/TransContext' import { matchSearchUsingCorePlugin } from '../../core/file-search/match/coreplugin-match' import { matchSearchUsingOmnisearch } from '../../core/file-search/match/omnisearch-match' import { regexSearchUsingCorePlugin } from '../../core/file-search/regex/coreplugin-regex' @@ -34,7 +35,7 @@ import { LLMBaseUrlNotSetException, LLMModelNotSetException, } from '../../core/llm/exception' -import { TransformationType, runTransformation } from '../../core/transformations/run_trans' +import { TransformationType } from '../../core/transformations/trans-engine' import { useChatHistory } from '../../hooks/use-chat-history' import { useCustomModes } from '../../hooks/use-custom-mode' import { t } from '../../lang/helpers' @@ -118,6 +119,7 @@ const Chat = forwardRef((props, ref) => { const app = useApp() const { settings, setSettings } = useSettings() const { getRAGEngine } = useRAG() + const { getTransEngine } = useTrans() const diffStrategy = useDiffStrategy() const dataviewManager = useDataview() const { getMcpHub } = useMcpHub() @@ -832,30 +834,24 @@ const Chat = forwardRef((props, ref) => { } else if (toolArgs.type === 'call_transformations') { // Handling for the unified transformations tool try { - const targetFile = app.vault.getFileByPath(toolArgs.path); - if (!targetFile) { - throw new Error(`File not found: ${toolArgs.path}`); - } - - const fileContent = await readTFileContentPdf(targetFile, app.vault, app); - - // The transformation type is now passed directly in the arguments - const transformationType = toolArgs.transformation as TransformationType; - + console.log("call_transformations", toolArgs) // Validate that the transformation type is a valid enum member - if (!Object.values(TransformationType).includes(transformationType)) { - throw new Error(`Unsupported transformation type: ${transformationType}`); + if (!Object.values(TransformationType).includes(toolArgs.transformation as TransformationType)) { + throw new Error(`Unsupported transformation type: ${toolArgs.transformation}`); } - // Execute the transformation - const transformationResult = await runTransformation({ - content: fileContent, - transformationType, - settings, + const transformationType = toolArgs.transformation as TransformationType; + const transEngine = await getTransEngine(); + + // Execute the transformation using the TransEngine + const transformationResult = await transEngine.runTransformation({ + filePath: toolArgs.path, + transformationType: transformationType, model: { provider: settings.applyModelProvider, modelId: settings.applyModelId, - } + }, + saveToDatabase: true }); if (!transformationResult.success) { @@ -863,7 +859,7 @@ const Chat = forwardRef((props, ref) => { } // Build the result message - let formattedContent = `[${transformationType}] transformation complete:\n\n${transformationResult.result}`; + let formattedContent = `[${toolArgs.transformation}] transformation complete:\n\n${transformationResult.result}`; if (transformationResult.truncated) { formattedContent += `\n\n*Note: The original content was too long (${transformationResult.originalTokens} tokens) and was truncated to ${transformationResult.processedTokens} tokens for processing.*`; diff --git a/src/contexts/TransContext.tsx b/src/contexts/TransContext.tsx new file mode 100644 index 0000000..ff85d38 --- /dev/null +++ b/src/contexts/TransContext.tsx @@ -0,0 +1,39 @@ +import { + PropsWithChildren, + createContext, + useContext, + useEffect, + useMemo, +} from 'react' + +import { TransEngine } from '../core/transformations/trans-engine' + +export type TransContextType = { + getTransEngine: () => Promise +} + +const TransContext = createContext(null) + +export function TransProvider({ + getTransEngine, + children, +}: PropsWithChildren<{ getTransEngine: () => Promise }>) { + useEffect(() => { + // start initialization of transEngine in the background + void getTransEngine() + }, [getTransEngine]) + + const value = useMemo(() => { + return { getTransEngine } + }, [getTransEngine]) + + return {children} +} + +export function useTrans() { + const context = useContext(TransContext) + if (!context) { + throw new Error('useTrans must be used within a TransProvider') + } + return context +} diff --git a/src/core/prompts/tools/call-insights.ts b/src/core/prompts/tools/call-insights.ts index 1c6138d..1aa9b82 100644 --- a/src/core/prompts/tools/call-insights.ts +++ b/src/core/prompts/tools/call-insights.ts @@ -2,7 +2,7 @@ import { ToolArgs } from "./types" export function getCallInsightsDescription(args: ToolArgs): string { return `## insights -Description: Use for **Information Processing**. After reading a note's content, use this tool to process and distill the information in various ways. You must choose the most appropriate transformation type based on your goal. +Description: Use for **Knowledge Synthesis and Retrieval**. This is your primary tool for "asking questions" to a document or a set of documents. Use it to query your notes and extract higher-level insights, summaries, and other conceptual abstractions. Instead of just finding raw text, this tool helps you understand and synthesize the information within your vault. Parameters: - path: (required) The path to the file or folder to be processed (relative to the current working directory: ${args.cwd}). - transformation: (required) The type of transformation to apply. Must be one of the following: @@ -15,12 +15,12 @@ Parameters: Usage: path/to/your/file.md -simple_summary +simple_summary Example: Getting the key insights from a project note Projects/Project_Alpha_Retrospective.md -key_insights +key_insights ` } diff --git a/src/core/prompts/tools/index.ts b/src/core/prompts/tools/index.ts index 5443067..6ee4613 100644 --- a/src/core/prompts/tools/index.ts +++ b/src/core/prompts/tools/index.ts @@ -54,9 +54,9 @@ export function getToolDescriptionsForMode( customModes?: ModeConfig[], experiments?: Record, ): string { - console.log("getToolDescriptionsForMode", mode, customModes) + // console.log("getToolDescriptionsForMode", mode, customModes) const config = getModeConfig(mode, customModes) - console.log("config", config) + // console.log("config", config) const args: ToolArgs = { cwd, searchSettings, @@ -73,7 +73,7 @@ export function getToolDescriptionsForMode( config.groups.forEach((groupEntry) => { const groupName = getGroupName(groupEntry) const toolGroup = TOOL_GROUPS[groupName] - console.log("toolGroup", toolGroup) + // console.log("toolGroup", toolGroup) if (toolGroup) { toolGroup.tools.forEach((tool) => { if (isToolAllowedForMode(tool, mode, customModes ?? [], experiments ?? {})) { @@ -85,11 +85,11 @@ export function getToolDescriptionsForMode( // Add always available tools ALWAYS_AVAILABLE_TOOLS.forEach((tool) => tools.add(tool)) - console.log("tools", tools) + // console.log("tools", tools) // Map tool descriptions for allowed tools const descriptions = Array.from(tools).map((toolName) => { const descriptionFn = toolDescriptionMap[toolName] - console.log("descriptionFn", descriptionFn) + // console.log("descriptionFn", descriptionFn) if (!descriptionFn) { return undefined } diff --git a/src/core/transformations/run_trans.ts b/src/core/transformations/run_trans.ts deleted file mode 100644 index 1bcc91b..0000000 --- a/src/core/transformations/run_trans.ts +++ /dev/null @@ -1,389 +0,0 @@ -import { Result, err, ok } from "neverthrow"; - -import { LLMModel } from '../../types/llm/model'; -import { RequestMessage } from '../../types/llm/request'; -import { InfioSettings } from '../../types/settings'; -import { tokenCount } from '../../utils/token'; -import LLMManager from '../llm/manager'; -import { ANALYZE_PAPER_DESCRIPTION, ANALYZE_PAPER_PROMPT } from '../prompts/transformations/analyze-paper'; -import { DENSE_SUMMARY_DESCRIPTION, DENSE_SUMMARY_PROMPT } from '../prompts/transformations/dense-summary'; -import { KEY_INSIGHTS_DESCRIPTION, KEY_INSIGHTS_PROMPT } from '../prompts/transformations/key-insights'; -import { REFLECTIONS_DESCRIPTION, REFLECTIONS_PROMPT } from '../prompts/transformations/reflections'; -import { SIMPLE_SUMMARY_DESCRIPTION, SIMPLE_SUMMARY_PROMPT } from '../prompts/transformations/simple-summary'; -import { TABLE_OF_CONTENTS_DESCRIPTION, TABLE_OF_CONTENTS_PROMPT } from '../prompts/transformations/table-of-contents'; - -// 转换类型枚举 -export enum TransformationType { - DENSE_SUMMARY = 'dense-summary', - ANALYZE_PAPER = 'analyze-paper', - SIMPLE_SUMMARY = 'simple-summary', - KEY_INSIGHTS = 'key-insights', - TABLE_OF_CONTENTS = 'table-of-contents', - REFLECTIONS = 'reflections' -} - -// 转换配置接口 -export interface TransformationConfig { - type: TransformationType; - prompt: string; - description: string; - maxTokens?: number; -} - -// 所有可用的转换配置 -export const TRANSFORMATIONS: Record = { - [TransformationType.DENSE_SUMMARY]: { - type: TransformationType.DENSE_SUMMARY, - prompt: DENSE_SUMMARY_PROMPT, - description: DENSE_SUMMARY_DESCRIPTION, - maxTokens: 4000 - }, - [TransformationType.ANALYZE_PAPER]: { - type: TransformationType.ANALYZE_PAPER, - prompt: ANALYZE_PAPER_PROMPT, - description: ANALYZE_PAPER_DESCRIPTION, - maxTokens: 3000 - }, - [TransformationType.SIMPLE_SUMMARY]: { - type: TransformationType.SIMPLE_SUMMARY, - prompt: SIMPLE_SUMMARY_PROMPT, - description: SIMPLE_SUMMARY_DESCRIPTION, - maxTokens: 2000 - }, - [TransformationType.KEY_INSIGHTS]: { - type: TransformationType.KEY_INSIGHTS, - prompt: KEY_INSIGHTS_PROMPT, - description: KEY_INSIGHTS_DESCRIPTION, - maxTokens: 3000 - }, - [TransformationType.TABLE_OF_CONTENTS]: { - type: TransformationType.TABLE_OF_CONTENTS, - prompt: TABLE_OF_CONTENTS_PROMPT, - description: TABLE_OF_CONTENTS_DESCRIPTION, - maxTokens: 2000 - }, - [TransformationType.REFLECTIONS]: { - type: TransformationType.REFLECTIONS, - prompt: REFLECTIONS_PROMPT, - description: REFLECTIONS_DESCRIPTION, - maxTokens: 2500 - } -}; - -// 转换参数接口 -export interface TransformationParams { - content: string; - transformationType: TransformationType; - settings: InfioSettings; - model?: LLMModel; - maxContentTokens?: number; -} - -// 转换结果接口 -export interface TransformationResult { - success: boolean; - result?: string; - error?: string; - truncated?: boolean; - originalTokens?: number; - processedTokens?: number; -} - -/** - * LLM 客户端类,用于与语言模型交互 - */ -class TransformationLLMClient { - private llm: LLMManager; - private model: LLMModel; - - constructor(llm: LLMManager, model: LLMModel) { - this.llm = llm; - this.model = model; - } - - async queryChatModel(messages: RequestMessage[]): Promise> { - try { - const stream = await this.llm.streamResponse( - this.model, - { - messages: messages, - model: this.model.modelId, - stream: true, - } - ); - - let response_content = ""; - for await (const chunk of stream) { - const content = chunk.choices[0]?.delta?.content ?? ''; - response_content += content; - } - return ok(response_content); - } catch (error) { - return err(error instanceof Error ? error : new Error(String(error))); - } - } -} - -/** - * 文档内容处理类 - */ -class DocumentProcessor { - private static readonly DEFAULT_MAX_TOKENS = 12000; // 默认最大 token 数 - private static readonly MIN_CONTENT_LENGTH = 100; // 最小内容长度(字符数) - - /** - * 检查和处理文档内容大小 - */ - static async processContent(content: string, maxTokens: number = this.DEFAULT_MAX_TOKENS): Promise<{ - processedContent: string; - truncated: boolean; - originalTokens: number; - processedTokens: number; - }> { - const originalTokens = await tokenCount(content); - - if (originalTokens <= maxTokens) { - return { - processedContent: content, - truncated: false, - originalTokens, - processedTokens: originalTokens - }; - } - - // 智能截断:基于 token 数量和内容边界 - // 先按字符比例粗略估算截断位置 - const estimatedCharRatio = content.length / originalTokens; - const estimatedCharLimit = Math.floor(maxTokens * estimatedCharRatio * 0.9); // 留一些缓冲 - - let truncatedContent = content.substring(0, estimatedCharLimit); - - // 查找最后一个完整句子的结束位置 - const lastSentenceEnd = Math.max( - truncatedContent.lastIndexOf('.'), - truncatedContent.lastIndexOf('!'), - truncatedContent.lastIndexOf('?'), - truncatedContent.lastIndexOf('。'), - truncatedContent.lastIndexOf('!'), - truncatedContent.lastIndexOf('?') - ); - - // 查找最后一个段落的结束位置 - const lastParagraphEnd = truncatedContent.lastIndexOf('\n\n'); - - // 选择最合适的截断位置 - const cutoffPosition = Math.max(lastSentenceEnd, lastParagraphEnd); - - if (cutoffPosition > estimatedCharLimit * 0.8) { // 如果截断位置不会丢失太多内容 - truncatedContent = content.substring(0, cutoffPosition + 1); - } - - // 确保截断后的内容不会太短 - if (truncatedContent.length < this.MIN_CONTENT_LENGTH) { - // 按字符比例回退到安全长度 - const safeCharLimit = Math.max(this.MIN_CONTENT_LENGTH, Math.floor(maxTokens * estimatedCharRatio * 0.8)); - truncatedContent = content.substring(0, Math.min(safeCharLimit, content.length)); - } - - // 验证最终的 token 数量 - const finalTokens = await tokenCount(truncatedContent); - - // 如果仍然超过限制,进行更精确的截断 - if (finalTokens > maxTokens) { - const adjustedRatio = truncatedContent.length / finalTokens; - const adjustedCharLimit = Math.floor(maxTokens * adjustedRatio); - truncatedContent = content.substring(0, adjustedCharLimit); - } - - const processedTokens = await tokenCount(truncatedContent); - - return { - processedContent: truncatedContent, - truncated: true, - originalTokens, - processedTokens - }; - } - - /** - * 验证内容是否适合处理 - */ - static validateContent(content: string): Result { - if (!content || content.trim().length === 0) { - return err(new Error('内容不能为空')); - } - - if (content.length < this.MIN_CONTENT_LENGTH) { - return err(new Error(`内容长度至少需要 ${this.MIN_CONTENT_LENGTH} 个字符`)); - } - - return ok(undefined); - } -} - -/** - * 主要的转换执行函数 - */ -export async function runTransformation(params: TransformationParams): Promise { - const { content, transformationType, settings, model, maxContentTokens } = params; - - try { - // 验证内容 - const contentValidation = DocumentProcessor.validateContent(content); - if (contentValidation.isErr()) { - return { - success: false, - error: contentValidation.error.message - }; - } - - // 获取转换配置 - const transformationConfig = TRANSFORMATIONS[transformationType]; - if (!transformationConfig) { - return { - success: false, - error: `不支持的转换类型: ${transformationType}` - }; - } - - // 处理文档内容(检查 token 数量并截断) - const tokenLimit = maxContentTokens || DocumentProcessor['DEFAULT_MAX_TOKENS']; - const processedDocument = await DocumentProcessor.processContent(content, tokenLimit); - - // 使用默认模型或传入的模型 - const llmModel: LLMModel = model || { - provider: settings.applyModelProvider, - modelId: settings.applyModelId, - }; - - // 创建 LLM 管理器和客户端 - const llmManager = new LLMManager(settings); - const client = new TransformationLLMClient(llmManager, llmModel); - - // 构建请求消息 - const messages: RequestMessage[] = [ - { - role: 'system', - content: transformationConfig.prompt - }, - { - role: 'user', - content: processedDocument.processedContent - } - ]; - - // 调用 LLM 执行转换 - const result = await client.queryChatModel(messages); - - if (result.isErr()) { - return { - success: false, - error: `LLM 调用失败: ${result.error.message}`, - truncated: processedDocument.truncated, - originalTokens: processedDocument.originalTokens, - processedTokens: processedDocument.processedTokens - }; - } - - // 后处理结果 - const processedResult = postProcessResult(result.value, transformationType); - - return { - success: true, - result: processedResult, - truncated: processedDocument.truncated, - originalTokens: processedDocument.originalTokens, - processedTokens: processedDocument.processedTokens - }; - - } catch (error) { - return { - success: false, - error: `转换过程中出现错误: ${error instanceof Error ? error.message : String(error)}` - }; - } -} - -/** - * 后处理转换结果 - */ -function postProcessResult(result: string, transformationType: TransformationType): string { - let processed = result.trim(); - - // 移除可能的 markdown 代码块标记 - processed = processed.replace(/^```[\w]*\n/, '').replace(/\n```$/, ''); - - // 根据转换类型进行特定的后处理 - switch (transformationType) { - case TransformationType.KEY_INSIGHTS: - // 确保 insights 格式正确 - if (!processed.includes('INSIGHTS')) { - processed = `# INSIGHTS\n\n${processed}`; - } - break; - - case TransformationType.REFLECTIONS: - // 确保 reflections 格式正确 - if (!processed.includes('REFLECTIONS')) { - processed = `# REFLECTIONS\n\n${processed}`; - } - break; - - case TransformationType.ANALYZE_PAPER: { - // 确保论文分析包含所有必需的部分 - const requiredSections = ['PURPOSE', 'CONTRIBUTION', 'KEY FINDINGS', 'IMPLICATIONS', 'LIMITATIONS']; - const hasAllSections = requiredSections.every(section => - processed.toUpperCase().includes(section) - ); - - if (!hasAllSections) { - // 如果缺少某些部分,添加提示 - processed += '\n\n*注意:某些分析部分可能不完整,建议重新处理或检查原始内容。*'; - } - break; - } - } - - return processed; -} - -/** - * 批量执行转换 - */ -export async function runBatchTransformations( - content: string, - transformationTypes: TransformationType[], - settings: InfioSettings, - model?: LLMModel -): Promise> { - const results: Record = {}; - - // 并行执行所有转换 - const promises = transformationTypes.map(async (type) => { - const result = await runTransformation({ - content, - transformationType: type, - settings, - model - }); - return { type, result }; - }); - - const completedResults = await Promise.all(promises); - - for (const { type, result } of completedResults) { - results[type] = result; - } - - return results; -} - -/** - * 获取所有可用的转换类型和描述 - */ -export function getAvailableTransformations(): Array<{ type: TransformationType, description: string }> { - return Object.values(TRANSFORMATIONS).map(config => ({ - type: config.type, - description: config.description - })); -} diff --git a/src/core/transformations/trans-engine.ts b/src/core/transformations/trans-engine.ts new file mode 100644 index 0000000..8fcb8c4 --- /dev/null +++ b/src/core/transformations/trans-engine.ts @@ -0,0 +1,683 @@ +import { Result, err, ok } from "neverthrow"; +import { App } from 'obsidian'; + +import { DBManager } from '../../database/database-manager'; +import { InsightManager } from '../../database/modules/insight/insight-manager'; +import { EmbeddingModel } from '../../types/embedding'; +import { LLMModel } from '../../types/llm/model'; +import { RequestMessage } from '../../types/llm/request'; +import { InfioSettings } from '../../types/settings'; +import { readTFileContentPdf } from '../../utils/obsidian'; +import { tokenCount } from '../../utils/token'; +import LLMManager from '../llm/manager'; +import { ANALYZE_PAPER_DESCRIPTION, ANALYZE_PAPER_PROMPT } from '../prompts/transformations/analyze-paper'; +import { DENSE_SUMMARY_DESCRIPTION, DENSE_SUMMARY_PROMPT } from '../prompts/transformations/dense-summary'; +import { KEY_INSIGHTS_DESCRIPTION, KEY_INSIGHTS_PROMPT } from '../prompts/transformations/key-insights'; +import { REFLECTIONS_DESCRIPTION, REFLECTIONS_PROMPT } from '../prompts/transformations/reflections'; +import { SIMPLE_SUMMARY_DESCRIPTION, SIMPLE_SUMMARY_PROMPT } from '../prompts/transformations/simple-summary'; +import { TABLE_OF_CONTENTS_DESCRIPTION, TABLE_OF_CONTENTS_PROMPT } from '../prompts/transformations/table-of-contents'; +import { getEmbeddingModel } from '../rag/embedding'; + +// 转换类型枚举 +export enum TransformationType { + DENSE_SUMMARY = 'dense_summary', + ANALYZE_PAPER = 'analyze_paper', + SIMPLE_SUMMARY = 'simple_summary', + KEY_INSIGHTS = 'key_insights', + TABLE_OF_CONTENTS = 'table_of_contents', + REFLECTIONS = 'reflections' +} + +// 转换配置接口 +export interface TransformationConfig { + type: TransformationType; + prompt: string; + description: string; + maxTokens?: number; +} + +// 所有可用的转换配置 +export const TRANSFORMATIONS: Record = { + [TransformationType.DENSE_SUMMARY]: { + type: TransformationType.DENSE_SUMMARY, + prompt: DENSE_SUMMARY_PROMPT, + description: DENSE_SUMMARY_DESCRIPTION, + maxTokens: 4000 + }, + [TransformationType.ANALYZE_PAPER]: { + type: TransformationType.ANALYZE_PAPER, + prompt: ANALYZE_PAPER_PROMPT, + description: ANALYZE_PAPER_DESCRIPTION, + maxTokens: 3000 + }, + [TransformationType.SIMPLE_SUMMARY]: { + type: TransformationType.SIMPLE_SUMMARY, + prompt: SIMPLE_SUMMARY_PROMPT, + description: SIMPLE_SUMMARY_DESCRIPTION, + maxTokens: 2000 + }, + [TransformationType.KEY_INSIGHTS]: { + type: TransformationType.KEY_INSIGHTS, + prompt: KEY_INSIGHTS_PROMPT, + description: KEY_INSIGHTS_DESCRIPTION, + maxTokens: 3000 + }, + [TransformationType.TABLE_OF_CONTENTS]: { + type: TransformationType.TABLE_OF_CONTENTS, + prompt: TABLE_OF_CONTENTS_PROMPT, + description: TABLE_OF_CONTENTS_DESCRIPTION, + maxTokens: 2000 + }, + [TransformationType.REFLECTIONS]: { + type: TransformationType.REFLECTIONS, + prompt: REFLECTIONS_PROMPT, + description: REFLECTIONS_DESCRIPTION, + maxTokens: 2500 + } +}; + +// 转换参数接口 +export interface TransformationParams { + filePath: string; // 必须的文件路径 + contentType?: 'document' | 'tag' | 'folder'; + transformationType: TransformationType; + model?: LLMModel; + maxContentTokens?: number; + saveToDatabase?: boolean; +} + +// 转换结果接口 +export interface TransformationResult { + success: boolean; + result?: string; + error?: string; + truncated?: boolean; + originalTokens?: number; + processedTokens?: number; +} + +/** + * LLM 客户端类,用于与语言模型交互 + */ +class TransformationLLMClient { + private llm: LLMManager; + private model: LLMModel; + + constructor(llm: LLMManager, model: LLMModel) { + this.llm = llm; + this.model = model; + } + + async queryChatModel(messages: RequestMessage[]): Promise> { + try { + const stream = await this.llm.streamResponse( + this.model, + { + messages: messages, + model: this.model.modelId, + stream: true, + } + ); + + let response_content = ""; + for await (const chunk of stream) { + const content = chunk.choices[0]?.delta?.content ?? ''; + response_content += content; + } + return ok(response_content); + } catch (error) { + return err(error instanceof Error ? error : new Error(String(error))); + } + } +} + +/** + * 文档内容处理类 + */ +class DocumentProcessor { + private static readonly DEFAULT_MAX_TOKENS = 12000; // 默认最大 token 数 + private static readonly MIN_CONTENT_LENGTH = 100; // 最小内容长度(字符数) + + /** + * 检查和处理文档内容大小 + */ + static async processContent(content: string, maxTokens: number = this.DEFAULT_MAX_TOKENS): Promise<{ + processedContent: string; + truncated: boolean; + originalTokens: number; + processedTokens: number; + }> { + const originalTokens = await tokenCount(content); + + if (originalTokens <= maxTokens) { + return { + processedContent: content, + truncated: false, + originalTokens, + processedTokens: originalTokens + }; + } + + // 智能截断:基于 token 数量和内容边界 + // 先按字符比例粗略估算截断位置 + const estimatedCharRatio = content.length / originalTokens; + const estimatedCharLimit = Math.floor(maxTokens * estimatedCharRatio * 0.9); // 留一些缓冲 + + let truncatedContent = content.substring(0, estimatedCharLimit); + + // 查找最后一个完整句子的结束位置 + const lastSentenceEnd = Math.max( + truncatedContent.lastIndexOf('.'), + truncatedContent.lastIndexOf('!'), + truncatedContent.lastIndexOf('?'), + truncatedContent.lastIndexOf('。'), + truncatedContent.lastIndexOf('!'), + truncatedContent.lastIndexOf('?') + ); + + // 查找最后一个段落的结束位置 + const lastParagraphEnd = truncatedContent.lastIndexOf('\n\n'); + + // 选择最合适的截断位置 + const cutoffPosition = Math.max(lastSentenceEnd, lastParagraphEnd); + + if (cutoffPosition > estimatedCharLimit * 0.8) { // 如果截断位置不会丢失太多内容 + truncatedContent = content.substring(0, cutoffPosition + 1); + } + + // 确保截断后的内容不会太短 + if (truncatedContent.length < this.MIN_CONTENT_LENGTH) { + // 按字符比例回退到安全长度 + const safeCharLimit = Math.max(this.MIN_CONTENT_LENGTH, Math.floor(maxTokens * estimatedCharRatio * 0.8)); + truncatedContent = content.substring(0, Math.min(safeCharLimit, content.length)); + } + + // 验证最终的 token 数量 + const finalTokens = await tokenCount(truncatedContent); + + // 如果仍然超过限制,进行更精确的截断 + if (finalTokens > maxTokens) { + const adjustedRatio = truncatedContent.length / finalTokens; + const adjustedCharLimit = Math.floor(maxTokens * adjustedRatio); + truncatedContent = content.substring(0, adjustedCharLimit); + } + + const processedTokens = await tokenCount(truncatedContent); + + return { + processedContent: truncatedContent, + truncated: true, + originalTokens, + processedTokens + }; + } + + /** + * 验证内容是否适合处理 + */ + static validateContent(content: string): Result { + if (!content || content.trim().length === 0) { + return err(new Error('内容不能为空')); + } + + if (content.length < this.MIN_CONTENT_LENGTH) { + return err(new Error(`内容长度至少需要 ${this.MIN_CONTENT_LENGTH} 个字符`)); + } + + return ok(undefined); + } +} + +/** + * 转换引擎类 + */ +export class TransEngine { + private app: App; + private settings: InfioSettings; + private llmManager: LLMManager; + private insightManager: InsightManager | null = null; + private embeddingModel: EmbeddingModel | null = null; + + constructor( + app: App, + settings: InfioSettings, + dbManager: DBManager, + ) { + this.app = app; + this.settings = settings; + this.llmManager = new LLMManager(settings); + this.insightManager = dbManager.getInsightManager(); + + // 初始化 embedding model + if (settings.embeddingModelId && settings.embeddingModelId.trim() !== '') { + try { + this.embeddingModel = getEmbeddingModel(settings); + } catch (error) { + console.warn('Failed to initialize embedding model:', error); + this.embeddingModel = null; + } + } else { + this.embeddingModel = null; + } + } + + cleanup() { + this.embeddingModel = null; + this.insightManager = null; + } + + setSettings(settings: InfioSettings) { + this.settings = settings; + this.llmManager = new LLMManager(settings); + + // 重新初始化 embedding model + if (settings.embeddingModelId && settings.embeddingModelId.trim() !== '') { + try { + this.embeddingModel = getEmbeddingModel(settings); + } catch (error) { + console.warn('Failed to initialize embedding model:', error); + this.embeddingModel = null; + } + } else { + this.embeddingModel = null; + } + } + + /** + * 获取文件元信息的方法 + */ + private async getFileMetadata(filePath: string): Promise< + | { + success: true; + fileExists: true; + sourcePath: string; + sourceMtime: number; + } + | { + success: false; + error: string; + } + > { + const targetFile = this.app.vault.getFileByPath(filePath); + if (!targetFile) { + return { + success: false, + error: `文件不存在: ${filePath}` + }; + } + + return { + success: true, + fileExists: true, + sourcePath: filePath, + sourceMtime: targetFile.stat.mtime + }; + } + + /** + * 检查数据库缓存的方法 + */ + private async checkDatabaseCache( + sourcePath: string, + sourceMtime: number, + transformationType: TransformationType + ): Promise< + | { + success: true; + foundCache: true; + result: TransformationResult; + } + | { + success: true; + foundCache: false; + } + > { + // 如果没有必要的参数,跳过缓存检查 + if (!this.embeddingModel || !this.insightManager) { + console.log("no embeddingModel or insightManager"); + return { + success: true, + foundCache: false + }; + } + + try { + const existingInsights = await this.insightManager.getInsightsBySourcePath(sourcePath, this.embeddingModel); + console.log("existingInsights", existingInsights); + + // 查找匹配的转换类型和修改时间的洞察 + const matchingInsight = existingInsights.find(insight => + insight.insight_type === transformationType && + insight.source_mtime === sourceMtime + ); + + if (matchingInsight) { + // 找到匹配的缓存结果,直接返回 + console.log(`使用缓存的转换结果: ${transformationType} for ${sourcePath}`); + return { + success: true, + foundCache: true, + result: { + success: true, + result: matchingInsight.insight, + truncated: false, // 缓存的结果不涉及截断 + originalTokens: 0, // 缓存结果不需要提供token信息 + processedTokens: 0 + } + }; + } + + return { + success: true, + foundCache: false + }; + } catch (cacheError) { + console.warn('查询缓存失败,继续执行转换:', cacheError); + // 缓存查询失败不影响主流程 + return { + success: true, + foundCache: false + }; + } + } + + /** + * 获取文件内容的方法 + */ + private async getFileContent(filePath: string): Promise< + | { + success: true; + fileContent: string; + } + | { + success: false; + error: string; + } + > { + const targetFile = this.app.vault.getFileByPath(filePath); + if (!targetFile) { + return { + success: false, + error: `文件不存在: ${filePath}` + }; + } + + try { + const fileContent = await readTFileContentPdf(targetFile, this.app.vault, this.app); + return { + success: true, + fileContent + }; + } catch (error) { + return { + success: false, + error: `读取文件失败: ${error instanceof Error ? error.message : String(error)}` + }; + } + } + + /** + * 保存转换结果到数据库的方法 + */ + private async saveResultToDatabase( + result: string, + transformationType: TransformationType, + sourcePath: string, + sourceMtime: number, + contentType: string + ): Promise { + if (!this.embeddingModel || !this.insightManager) { + return; + } + + try { + // 生成洞察内容的嵌入向量 + const insightEmbedding = await this.embeddingModel.getEmbedding(result); + + // 保存到数据库 + await this.insightManager.storeInsight( + { + insightType: transformationType, + insight: result, + sourceType: contentType, + sourcePath: sourcePath, + sourceMtime: sourceMtime, + embedding: insightEmbedding, + }, + this.embeddingModel + ); + + console.log(`转换结果已成功保存到数据库: ${transformationType} for ${sourcePath}`); + } catch (dbError) { + console.warn('保存洞察到数据库失败:', dbError); + // 后台任务失败不影响主要的转换结果 + } + } + + /** + * 主要的转换执行方法 + */ + async runTransformation(params: TransformationParams): Promise { + console.log("runTransformation", params); + const { + filePath, + contentType = 'document', + transformationType, + model, + maxContentTokens, + saveToDatabase = false + } = params; + + try { + // 第一步:获取文件元信息 + const metadataResult = await this.getFileMetadata(filePath); + + if (!metadataResult.success) { + return { + success: false, + error: metadataResult.error + }; + } + + // 此时TypeScript知道metadataResult.success为true + const { sourcePath, sourceMtime } = metadataResult; + + // 第二步:检查数据库缓存 + const cacheCheckResult = await this.checkDatabaseCache( + sourcePath, + sourceMtime, + transformationType + ); + + if (cacheCheckResult.foundCache) { + return cacheCheckResult.result; + } + + // 第三步:获取文件内容(只有在没有缓存时才执行) + const fileContentResult = await this.getFileContent(filePath); + + if (!fileContentResult.success) { + return { + success: false, + error: fileContentResult.error + }; + } + + // 此时TypeScript知道fileContentResult.success为true + const { fileContent } = fileContentResult; + + // 验证内容 + const contentValidation = DocumentProcessor.validateContent(fileContent); + if (contentValidation.isErr()) { + return { + success: false, + error: contentValidation.error.message + }; + } + + // 获取转换配置 + const transformationConfig = TRANSFORMATIONS[transformationType]; + if (!transformationConfig) { + return { + success: false, + error: `不支持的转换类型: ${transformationType}` + }; + } + + // 处理文档内容(检查 token 数量并截断) + const tokenLimit = maxContentTokens || DocumentProcessor['DEFAULT_MAX_TOKENS']; + const processedDocument = await DocumentProcessor.processContent(fileContent, tokenLimit); + + // 使用默认模型或传入的模型 + const llmModel: LLMModel = model || { + provider: this.settings.applyModelProvider, + modelId: this.settings.applyModelId, + }; + + // 创建 LLM 客户端 + const client = new TransformationLLMClient(this.llmManager, llmModel); + + // 构建请求消息 + const messages: RequestMessage[] = [ + { + role: 'system', + content: transformationConfig.prompt + }, + { + role: 'user', + content: processedDocument.processedContent + } + ]; + + // 调用 LLM 执行转换 + const result = await client.queryChatModel(messages); + + if (result.isErr()) { + return { + success: false, + error: `LLM 调用失败: ${result.error.message}`, + truncated: processedDocument.truncated, + originalTokens: processedDocument.originalTokens, + processedTokens: processedDocument.processedTokens + }; + } + + // 后处理结果 + const processedResult = this.postProcessResult(result.value, transformationType); + + // 保存转换结果到数据库(后台任务,不阻塞主流程) + if (saveToDatabase) { + // 创建后台任务,不使用 await + (async () => { + await this.saveResultToDatabase( + processedResult, + transformationType, + sourcePath, + sourceMtime, + contentType + ); + })(); // 立即执行异步函数,但不等待其完成 + } + + return { + success: true, + result: processedResult, + truncated: processedDocument.truncated, + originalTokens: processedDocument.originalTokens, + processedTokens: processedDocument.processedTokens + }; + + } catch (error) { + return { + success: false, + error: `转换过程中出现错误: ${error instanceof Error ? error.message : String(error)}` + }; + } + } + + /** + * 后处理转换结果 + */ + private postProcessResult(result: string, transformationType: TransformationType): string { + let processed = result.trim(); + + // 移除可能的 markdown 代码块标记 + processed = processed.replace(/^```[\w]*\n/, '').replace(/\n```$/, ''); + + // 根据转换类型进行特定的后处理 + switch (transformationType) { + case TransformationType.KEY_INSIGHTS: + // 确保 insights 格式正确 + if (!processed.includes('INSIGHTS')) { + processed = `# INSIGHTS\n\n${processed}`; + } + break; + + case TransformationType.REFLECTIONS: + // 确保 reflections 格式正确 + if (!processed.includes('REFLECTIONS')) { + processed = `# REFLECTIONS\n\n${processed}`; + } + break; + + case TransformationType.ANALYZE_PAPER: { + // 确保论文分析包含所有必需的部分 + const requiredSections = ['PURPOSE', 'CONTRIBUTION', 'KEY FINDINGS', 'IMPLICATIONS', 'LIMITATIONS']; + const hasAllSections = requiredSections.every(section => + processed.toUpperCase().includes(section) + ); + + if (!hasAllSections) { + // 如果缺少某些部分,添加提示 + processed += '\n\n*注意:某些分析部分可能不完整,建议重新处理或检查原始内容。*'; + } + break; + } + } + + return processed; + } + + /** + * 批量执行转换 + */ + async runBatchTransformations( + filePath: string, + transformationTypes: TransformationType[], + options?: { + model?: LLMModel; + saveToDatabase?: boolean; + } + ): Promise> { + const results: Record = {}; + + // 并行执行所有转换 + const promises = transformationTypes.map(async (type) => { + const result = await this.runTransformation({ + filePath: filePath, + transformationType: type, + model: options?.model, + saveToDatabase: options?.saveToDatabase + }); + return { type, result }; + }); + + const completedResults = await Promise.all(promises); + + for (const { type, result } of completedResults) { + results[type] = result; + } + + return results; + } + + /** + * 获取所有可用的转换类型和描述 + */ + static getAvailableTransformations(): Array<{ type: TransformationType, description: string }> { + return Object.values(TRANSFORMATIONS).map(config => ({ + type: config.type, + description: config.description + })); + } +} diff --git a/src/core/transformations/usage-example.ts b/src/core/transformations/usage-example.ts deleted file mode 100644 index db86ff2..0000000 --- a/src/core/transformations/usage-example.ts +++ /dev/null @@ -1,181 +0,0 @@ -import { InfioSettings } from '../../types/settings'; - -import { - TransformationType, - getAvailableTransformations, - runBatchTransformations, - runTransformation, -} from './run_trans'; - -/** - * 使用示例:单个转换 - */ -export async function exampleSingleTransformation(settings: InfioSettings) { - const sampleContent = ` - 人工智能技术正在快速发展,特别是大型语言模型的出现,彻底改变了我们与计算机交互的方式。 - 这些模型能够理解和生成人类语言,在多个领域展现出令人印象深刻的能力。 - - 然而,随着AI技术的普及,我们也面临着新的挑战,包括伦理问题、隐私保护、 - 以及如何确保AI技术的安全和可控发展。这些问题需要全社会的共同关注和努力。 - - 未来,人工智能将继续在教育、医疗、商业等领域发挥重要作用, - 但我们必须在推进技术发展的同时,确保技术服务于人类的福祉。 - `; - - try { - // 执行简单摘要转换 - const result = await runTransformation({ - content: sampleContent, - transformationType: TransformationType.SIMPLE_SUMMARY, - settings: settings - }); - - if (result.success) { - console.log('转换成功!'); - console.log('结果:', result.result); - - if (result.truncated) { - console.log(`注意:内容被截断 (${result.originalLength} -> ${result.processedLength} 字符)`); - } - } else { - console.error('转换失败:', result.error); - } - - return result; - } catch (error) { - console.error('执行转换时出错:', error); - throw error; - } -} - -/** - * 使用示例:批量转换 - */ -export async function exampleBatchTransformations(settings: InfioSettings) { - const sampleContent = ` - 区块链技术作为一种分布式账本技术,具有去中心化、不可篡改、透明公开等特点。 - 它最初是为比特币而设计的底层技术,但现在已经扩展到各个行业和应用场景。 - - 在金融领域,区块链可以用于跨境支付、供应链金融、数字货币等; - 在供应链管理中,它能够提供产品溯源和防伪验证; - 在数字身份认证方面,区块链可以建立更安全可靠的身份管理系统。 - - 尽管区块链技术有很多优势,但它也面临着可扩展性、能耗、监管等挑战。 - 随着技术的不断成熟和完善,相信这些问题会逐步得到解决。 - 区块链技术的未来发展值得期待,它将为数字经济的发展提供重要的技术支撑。 - `; - - try { - // 同时执行多种转换 - const transformationTypes = [ - TransformationType.SIMPLE_SUMMARY, - TransformationType.KEY_INSIGHTS, - TransformationType.TABLE_OF_CONTENTS - ]; - - const results = await runBatchTransformations( - sampleContent, - transformationTypes, - settings - ); - - console.log('批量转换完成!'); - - for (const [type, result] of Object.entries(results)) { - console.log(`\n=== ${type.toUpperCase()} ===`); - if (result.success) { - console.log(result.result); - } else { - console.error('失败:', result.error); - } - } - - return results; - } catch (error) { - console.error('执行批量转换时出错:', error); - throw error; - } -} - -/** - * 使用示例:处理长文档(会被截断) - */ -export async function exampleLongDocumentProcessing(settings: InfioSettings) { - // 模拟一个很长的文档 - const longContent = '这是一个很长的文档内容。'.repeat(10000); // 约50万字符 - - try { - const result = await runTransformation({ - content: longContent, - transformationType: TransformationType.DENSE_SUMMARY, - settings: settings, - maxContentLength: 30000 // 设置最大内容长度 - }); - - if (result.success) { - console.log('长文档转换成功!'); - console.log('原始长度:', result.originalLength); - console.log('处理后长度:', result.processedLength); - console.log('是否被截断:', result.truncated); - console.log('结果长度:', result.result?.length); - } else { - console.error('转换失败:', result.error); - } - - return result; - } catch (error) { - console.error('处理长文档时出错:', error); - throw error; - } -} - -/** - * 使用示例:获取所有可用的转换类型 - */ -export function exampleGetAvailableTransformations() { - const availableTransformations = getAvailableTransformations(); - - console.log('可用的转换类型:'); - availableTransformations.forEach((transformation, index) => { - console.log(`${index + 1}. ${transformation.type}: ${transformation.description}`); - }); - - return availableTransformations; -} - -/** - * 使用示例:错误处理 - */ -export async function exampleErrorHandling(settings: InfioSettings) { - try { - // 测试空内容 - const emptyResult = await runTransformation({ - content: '', - transformationType: TransformationType.SIMPLE_SUMMARY, - settings: settings - }); - - console.log('空内容测试:', emptyResult); - - // 测试太短的内容 - const shortResult = await runTransformation({ - content: '太短', - transformationType: TransformationType.SIMPLE_SUMMARY, - settings: settings - }); - - console.log('短内容测试:', shortResult); - - // 测试无效的转换类型(需要类型断言来测试) - const invalidResult = await runTransformation({ - content: '这是一些测试内容,用于测试无效的转换类型处理。', - transformationType: 'invalid-type' as TransformationType, - settings: settings - }); - - console.log('无效类型测试:', invalidResult); - - } catch (error) { - console.error('错误处理测试时出错:', error); - } -} diff --git a/src/database/modules/insight/insight-manager.ts b/src/database/modules/insight/insight-manager.ts index 18d1f2c..c938563 100644 --- a/src/database/modules/insight/insight-manager.ts +++ b/src/database/modules/insight/insight-manager.ts @@ -1,8 +1,8 @@ import { App, TFile } from 'obsidian' -import { InsertSourceInsight, SelectSourceInsight } from '../../schema' import { EmbeddingModel } from '../../../types/embedding' import { DBManager } from '../../database-manager' +import { InsertSourceInsight, SelectSourceInsight } from '../../schema' import { InsightRepository } from './insight-repository' @@ -51,6 +51,7 @@ export class InsightManager { insight: string sourceType: 'document' | 'tag' | 'folder' sourcePath: string + sourceMtime: number embedding: number[] }, embeddingModel: EmbeddingModel, @@ -60,6 +61,7 @@ export class InsightManager { insight: insightData.insight, source_type: insightData.sourceType, source_path: insightData.sourcePath, + source_mtime: insightData.sourceMtime, embedding: insightData.embedding, } @@ -75,6 +77,7 @@ export class InsightManager { insight: string sourceType: 'document' | 'tag' | 'folder' sourcePath: string + sourceMtime: number embedding: number[] }>, embeddingModel: EmbeddingModel, @@ -84,6 +87,7 @@ export class InsightManager { insight: data.insight, source_type: data.sourceType, source_path: data.sourcePath, + source_mtime: data.sourceMtime, embedding: data.embedding, })) @@ -100,6 +104,7 @@ export class InsightManager { insight?: string sourceType?: 'document' | 'tag' | 'folder' sourcePath?: string + sourceMtime?: number embedding?: number[] }, embeddingModel: EmbeddingModel, @@ -118,6 +123,9 @@ export class InsightManager { if (updates.sourcePath !== undefined) { updateData.source_path = updates.sourcePath } + if (updates.sourceMtime !== undefined) { + updateData.source_mtime = updates.sourceMtime + } if (updates.embedding !== undefined) { updateData.embedding = updates.embedding } @@ -318,4 +326,26 @@ export class InsightManager { return filteredInsights } + + // /** + // * 根据源文件修改时间范围获取洞察 + // */ + // async getInsightsByMtimeRange( + // minMtime: number, + // maxMtime: number, + // embeddingModel: EmbeddingModel, + // ): Promise { + // return await this.repository.getInsightsByMtimeRange(minMtime, maxMtime, embeddingModel) + // } + + // /** + // * 根据源文件修改时间获取需要更新的洞察 + // */ + // async getOutdatedInsights( + // sourcePath: string, + // currentMtime: number, + // embeddingModel: EmbeddingModel, + // ): Promise { + // return await this.repository.getOutdatedInsights(sourcePath, currentMtime, embeddingModel) + // } } diff --git a/src/database/modules/insight/insight-repository.ts b/src/database/modules/insight/insight-repository.ts index d3bb37e..5045cfa 100644 --- a/src/database/modules/insight/insight-repository.ts +++ b/src/database/modules/insight/insight-repository.ts @@ -139,8 +139,8 @@ export class InsightRepository { // 构建批量插入的 SQL const values = data.map((insight, index) => { - const offset = index * 6 - return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5}, $${offset + 6})` + const offset = index * 7 + return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5}, $${offset + 6}, $${offset + 7})` }).join(',') const params = data.flatMap(insight => [ @@ -148,12 +148,13 @@ export class InsightRepository { insight.insight.replace(/\0/g, ''), // 清理null字节 insight.source_type, insight.source_path, + insight.source_mtime, `[${insight.embedding.join(',')}]`, // 转换为PostgreSQL vector格式 new Date() // updated_at ]) await this.db.query( - `INSERT INTO "${tableName}" (insight_type, insight, source_type, source_path, embedding, updated_at) + `INSERT INTO "${tableName}" (insight_type, insight, source_type, source_path, source_mtime, embedding, updated_at) VALUES ${values}`, params ) @@ -197,6 +198,12 @@ export class InsightRepository { paramIndex++ } + if (data.source_mtime !== undefined) { + fields.push(`source_mtime = $${paramIndex}`) + params.push(data.source_mtime) + paramIndex++ + } + if (data.embedding !== undefined) { fields.push(`embedding = $${paramIndex}`) params.push(`[${data.embedding.join(',')}]`) @@ -235,7 +242,7 @@ export class InsightRepository { } const tableName = this.getTableName(embeddingModel) - let whereConditions = ['1 - (embedding <=> $1::vector) > $2'] + const whereConditions: string[] = ['1 - (embedding <=> $1::vector) > $2'] const params: unknown[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit] let paramIndex = 4 @@ -259,7 +266,7 @@ export class InsightRepository { const query = ` SELECT - id, insight_type, insight, source_type, source_path, created_at, updated_at, + id, insight_type, insight, source_type, source_path, source_mtime, created_at, updated_at, 1 - (embedding <=> $1::vector) as similarity FROM "${tableName}" WHERE ${whereConditions.join(' AND ')} @@ -271,4 +278,36 @@ export class InsightRepository { const result = await this.db.query(query, params) return result.rows } + + // async getInsightsByMtimeRange( + // minMtime: number, + // maxMtime: number, + // embeddingModel: EmbeddingModel, + // ): Promise { + // if (!this.db) { + // throw new DatabaseNotInitializedException() + // } + // const tableName = this.getTableName(embeddingModel) + // const result = await this.db.query( + // `SELECT * FROM "${tableName}" WHERE source_mtime >= $1 AND source_mtime <= $2 ORDER BY created_at DESC`, + // [minMtime, maxMtime] + // ) + // return result.rows + // } + + // async getOutdatedInsights( + // sourcePath: string, + // currentMtime: number, + // embeddingModel: EmbeddingModel, + // ): Promise { + // if (!this.db) { + // throw new DatabaseNotInitializedException() + // } + // const tableName = this.getTableName(embeddingModel) + // const result = await this.db.query( + // `SELECT * FROM "${tableName}" WHERE source_path = $1 AND source_mtime < $2 ORDER BY created_at DESC`, + // [sourcePath, currentMtime] + // ) + // return result.rows + // } } diff --git a/src/database/schema.ts b/src/database/schema.ts index 6d98ba0..e3140f0 100644 --- a/src/database/schema.ts +++ b/src/database/schema.ts @@ -1,7 +1,6 @@ import { SerializedLexicalNode } from 'lexical' import { SUPPORT_EMBEDDING_SIMENTION } from '../constants' -import { ApplyStatus } from '../types/apply' // import { EmbeddingModelId } from '../types/embedding' // PostgreSQL column types @@ -184,6 +183,7 @@ export type SourceInsightRecord = { insight: string source_type: 'document' | 'tag' | 'folder' source_path: string + source_mtime: number embedding: number[] created_at: Date updated_at: Date @@ -203,6 +203,7 @@ const createSourceInsightTable = (dimension: number): TableDefinition => { insight: { type: 'TEXT', notNull: true }, source_type: { type: 'TEXT', notNull: true }, source_path: { type: 'TEXT', notNull: true }, + source_mtime: { type: 'BIGINT', notNull: true }, embedding: { type: 'VECTOR', dimensions: dimension }, created_at: { type: 'TIMESTAMP', notNull: true, defaultNow: true }, updated_at: { type: 'TIMESTAMP', notNull: true, defaultNow: true } diff --git a/src/database/sql.ts b/src/database/sql.ts index c263baa..b088463 100644 --- a/src/database/sql.ts +++ b/src/database/sql.ts @@ -104,6 +104,7 @@ export const migrations: Record = { "insight" text NOT NULL, "source_type" text NOT NULL, "source_path" text NOT NULL, + "source_mtime" bigint NOT NULL, "embedding" vector(1536), "created_at" timestamp DEFAULT now() NOT NULL, "updated_at" timestamp DEFAULT now() NOT NULL @@ -115,6 +116,7 @@ export const migrations: Record = { "insight" text NOT NULL, "source_type" text NOT NULL, "source_path" text NOT NULL, + "source_mtime" bigint NOT NULL, "embedding" vector(1024), "created_at" timestamp DEFAULT now() NOT NULL, "updated_at" timestamp DEFAULT now() NOT NULL @@ -126,6 +128,7 @@ export const migrations: Record = { "insight" text NOT NULL, "source_type" text NOT NULL, "source_path" text NOT NULL, + "source_mtime" bigint NOT NULL, "embedding" vector(768), "created_at" timestamp DEFAULT now() NOT NULL, "updated_at" timestamp DEFAULT now() NOT NULL @@ -137,6 +140,7 @@ export const migrations: Record = { "insight" text NOT NULL, "source_type" text NOT NULL, "source_path" text NOT NULL, + "source_mtime" bigint NOT NULL, "embedding" vector(512), "created_at" timestamp DEFAULT now() NOT NULL, "updated_at" timestamp DEFAULT now() NOT NULL @@ -148,6 +152,7 @@ export const migrations: Record = { "insight" text NOT NULL, "source_type" text NOT NULL, "source_path" text NOT NULL, + "source_mtime" bigint NOT NULL, "embedding" vector(384), "created_at" timestamp DEFAULT now() NOT NULL, "updated_at" timestamp DEFAULT now() NOT NULL @@ -245,5 +250,16 @@ export const migrations: Record = { "created_at" timestamp DEFAULT now() NOT NULL ); ` + }, + add_source_mtime: { + description: "Adds missing source_mtime column to existing source insight tables", + sql: ` + -- Add source_mtime column to existing source insight tables if it doesn't exist + ALTER TABLE "source_insight_1536" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0; + ALTER TABLE "source_insight_1024" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0; + ALTER TABLE "source_insight_768" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0; + ALTER TABLE "source_insight_512" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0; + ALTER TABLE "source_insight_384" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0; + ` } }; diff --git a/src/main.ts b/src/main.ts index 3cd312f..ac3e980 100644 --- a/src/main.ts +++ b/src/main.ts @@ -11,6 +11,7 @@ import { getDiffStrategy } from "./core/diff/DiffStrategy" import { InlineEdit } from './core/edit/inline-edit-processor' import { McpHub } from './core/mcp/McpHub' import { RAGEngine } from './core/rag/rag-engine' +import { TransEngine } from './core/transformations/trans-engine' import { DBManager } from './database/database-manager' import { migrateToJsonDatabase } from './database/json/migrateToJsonDatabase' import EventListener from "./event-listener" @@ -41,6 +42,7 @@ export default class InfioPlugin extends Plugin { private activeLeafChangeUnloadFn: (() => void) | null = null private dbManagerInitPromise: Promise | null = null private ragEngineInitPromise: Promise | null = null + private transEngineInitPromise: Promise | null = null private mcpHubInitPromise: Promise | null = null settings: InfioSettings settingTab: InfioSettingTab @@ -49,6 +51,7 @@ export default class InfioPlugin extends Plugin { dbManager: DBManager | null = null mcpHub: McpHub | null = null ragEngine: RAGEngine | null = null + transEngine: TransEngine | null = null inlineEdit: InlineEdit | null = null diffStrategy?: DiffStrategy dataviewManager: DataviewManager | null = null @@ -422,10 +425,14 @@ export default class InfioPlugin extends Plugin { // Promise cleanup this.dbManagerInitPromise = null this.ragEngineInitPromise = null + this.transEngineInitPromise = null this.mcpHubInitPromise = null // RagEngine cleanup this.ragEngine?.cleanup() this.ragEngine = null + // TransEngine cleanup + this.transEngine?.cleanup() + this.transEngine = null // Database cleanup this.dbManager?.cleanup() this.dbManager = null @@ -445,6 +452,7 @@ export default class InfioPlugin extends Plugin { this.settings = newSettings await this.saveData(newSettings) this.ragEngine?.setSettings(newSettings) + this.transEngine?.setSettings(newSettings) this.settingsListeners.forEach((listener) => listener(newSettings)) } @@ -572,6 +580,23 @@ export default class InfioPlugin extends Plugin { return this.ragEngineInitPromise } + async getTransEngine(): Promise { + if (this.transEngine) { + return this.transEngine + } + + if (!this.transEngineInitPromise) { + this.transEngineInitPromise = (async () => { + const dbManager = await this.getDbManager() + this.transEngine = new TransEngine(this.app, this.settings, dbManager) + return this.transEngine + })() + } + + // if initialization is running, wait for it to complete instead of creating a new initialization promise + return this.transEngineInitPromise + } + private async migrateToJsonStorage() { try { const dbManager = await this.getDbManager() diff --git a/src/utils/parse-infio-block.ts b/src/utils/parse-infio-block.ts index 79df520..5901b0e 100644 --- a/src/utils/parse-infio-block.ts +++ b/src/utils/parse-infio-block.ts @@ -736,7 +736,7 @@ export function parseMsgBlocks( if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) { // @ts-expect-error - parse5 node value type path = childNode.childNodes[0].value - } else if (childNode.nodeName === 'type' && childNode.childNodes.length > 0) { + } else if (childNode.nodeName === 'transformation' && childNode.childNodes.length > 0) { // @ts-expect-error - parse5 node value type transformation = childNode.childNodes[0].value }