diff --git a/src/ChatView.tsx b/src/ChatView.tsx
index 44c5dc2..a7978dc 100644
--- a/src/ChatView.tsx
+++ b/src/ChatView.tsx
@@ -15,6 +15,7 @@ import { LLMProvider } from './contexts/LLMContext'
import { McpHubProvider } from './contexts/McpHubContext'
import { RAGProvider } from './contexts/RAGContext'
import { SettingsProvider } from './contexts/SettingsContext'
+import { TransProvider } from './contexts/TransContext'
import InfioPlugin from './main'
import { MentionableBlockData } from './types/mentionable'
import { InfioSettings } from './types/settings'
@@ -96,7 +97,8 @@ export class ChatView extends ItemView {
>
this.plugin.getRAGEngine()}>
-
+ this.plugin.getTransEngine()}>
+
this.plugin.getMcpHub()}>
@@ -109,6 +111,7 @@ export class ChatView extends ItemView {
+
diff --git a/src/components/chat-view/ChatView.tsx b/src/components/chat-view/ChatView.tsx
index da3ffb7..9aa3e1f 100644
--- a/src/components/chat-view/ChatView.tsx
+++ b/src/components/chat-view/ChatView.tsx
@@ -24,6 +24,7 @@ import { useLLM } from '../../contexts/LLMContext'
import { useMcpHub } from '../../contexts/McpHubContext'
import { useRAG } from '../../contexts/RAGContext'
import { useSettings } from '../../contexts/SettingsContext'
+import { useTrans } from '../../contexts/TransContext'
import { matchSearchUsingCorePlugin } from '../../core/file-search/match/coreplugin-match'
import { matchSearchUsingOmnisearch } from '../../core/file-search/match/omnisearch-match'
import { regexSearchUsingCorePlugin } from '../../core/file-search/regex/coreplugin-regex'
@@ -34,7 +35,7 @@ import {
LLMBaseUrlNotSetException,
LLMModelNotSetException,
} from '../../core/llm/exception'
-import { TransformationType, runTransformation } from '../../core/transformations/run_trans'
+import { TransformationType } from '../../core/transformations/trans-engine'
import { useChatHistory } from '../../hooks/use-chat-history'
import { useCustomModes } from '../../hooks/use-custom-mode'
import { t } from '../../lang/helpers'
@@ -118,6 +119,7 @@ const Chat = forwardRef((props, ref) => {
const app = useApp()
const { settings, setSettings } = useSettings()
const { getRAGEngine } = useRAG()
+ const { getTransEngine } = useTrans()
const diffStrategy = useDiffStrategy()
const dataviewManager = useDataview()
const { getMcpHub } = useMcpHub()
@@ -832,30 +834,24 @@ const Chat = forwardRef((props, ref) => {
} else if (toolArgs.type === 'call_transformations') {
// Handling for the unified transformations tool
try {
- const targetFile = app.vault.getFileByPath(toolArgs.path);
- if (!targetFile) {
- throw new Error(`File not found: ${toolArgs.path}`);
- }
-
- const fileContent = await readTFileContentPdf(targetFile, app.vault, app);
-
- // The transformation type is now passed directly in the arguments
- const transformationType = toolArgs.transformation as TransformationType;
-
+ console.log("call_transformations", toolArgs)
// Validate that the transformation type is a valid enum member
- if (!Object.values(TransformationType).includes(transformationType)) {
- throw new Error(`Unsupported transformation type: ${transformationType}`);
+ if (!Object.values(TransformationType).includes(toolArgs.transformation as TransformationType)) {
+ throw new Error(`Unsupported transformation type: ${toolArgs.transformation}`);
}
- // Execute the transformation
- const transformationResult = await runTransformation({
- content: fileContent,
- transformationType,
- settings,
+ const transformationType = toolArgs.transformation as TransformationType;
+ const transEngine = await getTransEngine();
+
+ // Execute the transformation using the TransEngine
+ const transformationResult = await transEngine.runTransformation({
+ filePath: toolArgs.path,
+ transformationType: transformationType,
model: {
provider: settings.applyModelProvider,
modelId: settings.applyModelId,
- }
+ },
+ saveToDatabase: true
});
if (!transformationResult.success) {
@@ -863,7 +859,7 @@ const Chat = forwardRef((props, ref) => {
}
// Build the result message
- let formattedContent = `[${transformationType}] transformation complete:\n\n${transformationResult.result}`;
+ let formattedContent = `[${toolArgs.transformation}] transformation complete:\n\n${transformationResult.result}`;
if (transformationResult.truncated) {
formattedContent += `\n\n*Note: The original content was too long (${transformationResult.originalTokens} tokens) and was truncated to ${transformationResult.processedTokens} tokens for processing.*`;
diff --git a/src/contexts/TransContext.tsx b/src/contexts/TransContext.tsx
new file mode 100644
index 0000000..ff85d38
--- /dev/null
+++ b/src/contexts/TransContext.tsx
@@ -0,0 +1,39 @@
+import {
+ PropsWithChildren,
+ createContext,
+ useContext,
+ useEffect,
+ useMemo,
+} from 'react'
+
+import { TransEngine } from '../core/transformations/trans-engine'
+
+export type TransContextType = {
+ getTransEngine: () => Promise
+}
+
+const TransContext = createContext(null)
+
+export function TransProvider({
+ getTransEngine,
+ children,
+}: PropsWithChildren<{ getTransEngine: () => Promise }>) {
+ useEffect(() => {
+ // start initialization of transEngine in the background
+ void getTransEngine()
+ }, [getTransEngine])
+
+ const value = useMemo(() => {
+ return { getTransEngine }
+ }, [getTransEngine])
+
+ return {children}
+}
+
+export function useTrans() {
+ const context = useContext(TransContext)
+ if (!context) {
+ throw new Error('useTrans must be used within a TransProvider')
+ }
+ return context
+}
diff --git a/src/core/prompts/tools/call-insights.ts b/src/core/prompts/tools/call-insights.ts
index 1c6138d..1aa9b82 100644
--- a/src/core/prompts/tools/call-insights.ts
+++ b/src/core/prompts/tools/call-insights.ts
@@ -2,7 +2,7 @@ import { ToolArgs } from "./types"
export function getCallInsightsDescription(args: ToolArgs): string {
return `## insights
-Description: Use for **Information Processing**. After reading a note's content, use this tool to process and distill the information in various ways. You must choose the most appropriate transformation type based on your goal.
+Description: Use for **Knowledge Synthesis and Retrieval**. This is your primary tool for "asking questions" to a document or a set of documents. Use it to query your notes and extract higher-level insights, summaries, and other conceptual abstractions. Instead of just finding raw text, this tool helps you understand and synthesize the information within your vault.
Parameters:
- path: (required) The path to the file or folder to be processed (relative to the current working directory: ${args.cwd}).
- transformation: (required) The type of transformation to apply. Must be one of the following:
@@ -15,12 +15,12 @@ Parameters:
Usage:
path/to/your/file.md
-simple_summary
+simple_summary
Example: Getting the key insights from a project note
Projects/Project_Alpha_Retrospective.md
-key_insights
+key_insights
`
}
diff --git a/src/core/prompts/tools/index.ts b/src/core/prompts/tools/index.ts
index 5443067..6ee4613 100644
--- a/src/core/prompts/tools/index.ts
+++ b/src/core/prompts/tools/index.ts
@@ -54,9 +54,9 @@ export function getToolDescriptionsForMode(
customModes?: ModeConfig[],
experiments?: Record,
): string {
- console.log("getToolDescriptionsForMode", mode, customModes)
+ // console.log("getToolDescriptionsForMode", mode, customModes)
const config = getModeConfig(mode, customModes)
- console.log("config", config)
+ // console.log("config", config)
const args: ToolArgs = {
cwd,
searchSettings,
@@ -73,7 +73,7 @@ export function getToolDescriptionsForMode(
config.groups.forEach((groupEntry) => {
const groupName = getGroupName(groupEntry)
const toolGroup = TOOL_GROUPS[groupName]
- console.log("toolGroup", toolGroup)
+ // console.log("toolGroup", toolGroup)
if (toolGroup) {
toolGroup.tools.forEach((tool) => {
if (isToolAllowedForMode(tool, mode, customModes ?? [], experiments ?? {})) {
@@ -85,11 +85,11 @@ export function getToolDescriptionsForMode(
// Add always available tools
ALWAYS_AVAILABLE_TOOLS.forEach((tool) => tools.add(tool))
- console.log("tools", tools)
+ // console.log("tools", tools)
// Map tool descriptions for allowed tools
const descriptions = Array.from(tools).map((toolName) => {
const descriptionFn = toolDescriptionMap[toolName]
- console.log("descriptionFn", descriptionFn)
+ // console.log("descriptionFn", descriptionFn)
if (!descriptionFn) {
return undefined
}
diff --git a/src/core/transformations/run_trans.ts b/src/core/transformations/run_trans.ts
deleted file mode 100644
index 1bcc91b..0000000
--- a/src/core/transformations/run_trans.ts
+++ /dev/null
@@ -1,389 +0,0 @@
-import { Result, err, ok } from "neverthrow";
-
-import { LLMModel } from '../../types/llm/model';
-import { RequestMessage } from '../../types/llm/request';
-import { InfioSettings } from '../../types/settings';
-import { tokenCount } from '../../utils/token';
-import LLMManager from '../llm/manager';
-import { ANALYZE_PAPER_DESCRIPTION, ANALYZE_PAPER_PROMPT } from '../prompts/transformations/analyze-paper';
-import { DENSE_SUMMARY_DESCRIPTION, DENSE_SUMMARY_PROMPT } from '../prompts/transformations/dense-summary';
-import { KEY_INSIGHTS_DESCRIPTION, KEY_INSIGHTS_PROMPT } from '../prompts/transformations/key-insights';
-import { REFLECTIONS_DESCRIPTION, REFLECTIONS_PROMPT } from '../prompts/transformations/reflections';
-import { SIMPLE_SUMMARY_DESCRIPTION, SIMPLE_SUMMARY_PROMPT } from '../prompts/transformations/simple-summary';
-import { TABLE_OF_CONTENTS_DESCRIPTION, TABLE_OF_CONTENTS_PROMPT } from '../prompts/transformations/table-of-contents';
-
-// 转换类型枚举
-export enum TransformationType {
- DENSE_SUMMARY = 'dense-summary',
- ANALYZE_PAPER = 'analyze-paper',
- SIMPLE_SUMMARY = 'simple-summary',
- KEY_INSIGHTS = 'key-insights',
- TABLE_OF_CONTENTS = 'table-of-contents',
- REFLECTIONS = 'reflections'
-}
-
-// 转换配置接口
-export interface TransformationConfig {
- type: TransformationType;
- prompt: string;
- description: string;
- maxTokens?: number;
-}
-
-// 所有可用的转换配置
-export const TRANSFORMATIONS: Record = {
- [TransformationType.DENSE_SUMMARY]: {
- type: TransformationType.DENSE_SUMMARY,
- prompt: DENSE_SUMMARY_PROMPT,
- description: DENSE_SUMMARY_DESCRIPTION,
- maxTokens: 4000
- },
- [TransformationType.ANALYZE_PAPER]: {
- type: TransformationType.ANALYZE_PAPER,
- prompt: ANALYZE_PAPER_PROMPT,
- description: ANALYZE_PAPER_DESCRIPTION,
- maxTokens: 3000
- },
- [TransformationType.SIMPLE_SUMMARY]: {
- type: TransformationType.SIMPLE_SUMMARY,
- prompt: SIMPLE_SUMMARY_PROMPT,
- description: SIMPLE_SUMMARY_DESCRIPTION,
- maxTokens: 2000
- },
- [TransformationType.KEY_INSIGHTS]: {
- type: TransformationType.KEY_INSIGHTS,
- prompt: KEY_INSIGHTS_PROMPT,
- description: KEY_INSIGHTS_DESCRIPTION,
- maxTokens: 3000
- },
- [TransformationType.TABLE_OF_CONTENTS]: {
- type: TransformationType.TABLE_OF_CONTENTS,
- prompt: TABLE_OF_CONTENTS_PROMPT,
- description: TABLE_OF_CONTENTS_DESCRIPTION,
- maxTokens: 2000
- },
- [TransformationType.REFLECTIONS]: {
- type: TransformationType.REFLECTIONS,
- prompt: REFLECTIONS_PROMPT,
- description: REFLECTIONS_DESCRIPTION,
- maxTokens: 2500
- }
-};
-
-// 转换参数接口
-export interface TransformationParams {
- content: string;
- transformationType: TransformationType;
- settings: InfioSettings;
- model?: LLMModel;
- maxContentTokens?: number;
-}
-
-// 转换结果接口
-export interface TransformationResult {
- success: boolean;
- result?: string;
- error?: string;
- truncated?: boolean;
- originalTokens?: number;
- processedTokens?: number;
-}
-
-/**
- * LLM 客户端类,用于与语言模型交互
- */
-class TransformationLLMClient {
- private llm: LLMManager;
- private model: LLMModel;
-
- constructor(llm: LLMManager, model: LLMModel) {
- this.llm = llm;
- this.model = model;
- }
-
- async queryChatModel(messages: RequestMessage[]): Promise> {
- try {
- const stream = await this.llm.streamResponse(
- this.model,
- {
- messages: messages,
- model: this.model.modelId,
- stream: true,
- }
- );
-
- let response_content = "";
- for await (const chunk of stream) {
- const content = chunk.choices[0]?.delta?.content ?? '';
- response_content += content;
- }
- return ok(response_content);
- } catch (error) {
- return err(error instanceof Error ? error : new Error(String(error)));
- }
- }
-}
-
-/**
- * 文档内容处理类
- */
-class DocumentProcessor {
- private static readonly DEFAULT_MAX_TOKENS = 12000; // 默认最大 token 数
- private static readonly MIN_CONTENT_LENGTH = 100; // 最小内容长度(字符数)
-
- /**
- * 检查和处理文档内容大小
- */
- static async processContent(content: string, maxTokens: number = this.DEFAULT_MAX_TOKENS): Promise<{
- processedContent: string;
- truncated: boolean;
- originalTokens: number;
- processedTokens: number;
- }> {
- const originalTokens = await tokenCount(content);
-
- if (originalTokens <= maxTokens) {
- return {
- processedContent: content,
- truncated: false,
- originalTokens,
- processedTokens: originalTokens
- };
- }
-
- // 智能截断:基于 token 数量和内容边界
- // 先按字符比例粗略估算截断位置
- const estimatedCharRatio = content.length / originalTokens;
- const estimatedCharLimit = Math.floor(maxTokens * estimatedCharRatio * 0.9); // 留一些缓冲
-
- let truncatedContent = content.substring(0, estimatedCharLimit);
-
- // 查找最后一个完整句子的结束位置
- const lastSentenceEnd = Math.max(
- truncatedContent.lastIndexOf('.'),
- truncatedContent.lastIndexOf('!'),
- truncatedContent.lastIndexOf('?'),
- truncatedContent.lastIndexOf('。'),
- truncatedContent.lastIndexOf('!'),
- truncatedContent.lastIndexOf('?')
- );
-
- // 查找最后一个段落的结束位置
- const lastParagraphEnd = truncatedContent.lastIndexOf('\n\n');
-
- // 选择最合适的截断位置
- const cutoffPosition = Math.max(lastSentenceEnd, lastParagraphEnd);
-
- if (cutoffPosition > estimatedCharLimit * 0.8) { // 如果截断位置不会丢失太多内容
- truncatedContent = content.substring(0, cutoffPosition + 1);
- }
-
- // 确保截断后的内容不会太短
- if (truncatedContent.length < this.MIN_CONTENT_LENGTH) {
- // 按字符比例回退到安全长度
- const safeCharLimit = Math.max(this.MIN_CONTENT_LENGTH, Math.floor(maxTokens * estimatedCharRatio * 0.8));
- truncatedContent = content.substring(0, Math.min(safeCharLimit, content.length));
- }
-
- // 验证最终的 token 数量
- const finalTokens = await tokenCount(truncatedContent);
-
- // 如果仍然超过限制,进行更精确的截断
- if (finalTokens > maxTokens) {
- const adjustedRatio = truncatedContent.length / finalTokens;
- const adjustedCharLimit = Math.floor(maxTokens * adjustedRatio);
- truncatedContent = content.substring(0, adjustedCharLimit);
- }
-
- const processedTokens = await tokenCount(truncatedContent);
-
- return {
- processedContent: truncatedContent,
- truncated: true,
- originalTokens,
- processedTokens
- };
- }
-
- /**
- * 验证内容是否适合处理
- */
- static validateContent(content: string): Result {
- if (!content || content.trim().length === 0) {
- return err(new Error('内容不能为空'));
- }
-
- if (content.length < this.MIN_CONTENT_LENGTH) {
- return err(new Error(`内容长度至少需要 ${this.MIN_CONTENT_LENGTH} 个字符`));
- }
-
- return ok(undefined);
- }
-}
-
-/**
- * 主要的转换执行函数
- */
-export async function runTransformation(params: TransformationParams): Promise {
- const { content, transformationType, settings, model, maxContentTokens } = params;
-
- try {
- // 验证内容
- const contentValidation = DocumentProcessor.validateContent(content);
- if (contentValidation.isErr()) {
- return {
- success: false,
- error: contentValidation.error.message
- };
- }
-
- // 获取转换配置
- const transformationConfig = TRANSFORMATIONS[transformationType];
- if (!transformationConfig) {
- return {
- success: false,
- error: `不支持的转换类型: ${transformationType}`
- };
- }
-
- // 处理文档内容(检查 token 数量并截断)
- const tokenLimit = maxContentTokens || DocumentProcessor['DEFAULT_MAX_TOKENS'];
- const processedDocument = await DocumentProcessor.processContent(content, tokenLimit);
-
- // 使用默认模型或传入的模型
- const llmModel: LLMModel = model || {
- provider: settings.applyModelProvider,
- modelId: settings.applyModelId,
- };
-
- // 创建 LLM 管理器和客户端
- const llmManager = new LLMManager(settings);
- const client = new TransformationLLMClient(llmManager, llmModel);
-
- // 构建请求消息
- const messages: RequestMessage[] = [
- {
- role: 'system',
- content: transformationConfig.prompt
- },
- {
- role: 'user',
- content: processedDocument.processedContent
- }
- ];
-
- // 调用 LLM 执行转换
- const result = await client.queryChatModel(messages);
-
- if (result.isErr()) {
- return {
- success: false,
- error: `LLM 调用失败: ${result.error.message}`,
- truncated: processedDocument.truncated,
- originalTokens: processedDocument.originalTokens,
- processedTokens: processedDocument.processedTokens
- };
- }
-
- // 后处理结果
- const processedResult = postProcessResult(result.value, transformationType);
-
- return {
- success: true,
- result: processedResult,
- truncated: processedDocument.truncated,
- originalTokens: processedDocument.originalTokens,
- processedTokens: processedDocument.processedTokens
- };
-
- } catch (error) {
- return {
- success: false,
- error: `转换过程中出现错误: ${error instanceof Error ? error.message : String(error)}`
- };
- }
-}
-
-/**
- * 后处理转换结果
- */
-function postProcessResult(result: string, transformationType: TransformationType): string {
- let processed = result.trim();
-
- // 移除可能的 markdown 代码块标记
- processed = processed.replace(/^```[\w]*\n/, '').replace(/\n```$/, '');
-
- // 根据转换类型进行特定的后处理
- switch (transformationType) {
- case TransformationType.KEY_INSIGHTS:
- // 确保 insights 格式正确
- if (!processed.includes('INSIGHTS')) {
- processed = `# INSIGHTS\n\n${processed}`;
- }
- break;
-
- case TransformationType.REFLECTIONS:
- // 确保 reflections 格式正确
- if (!processed.includes('REFLECTIONS')) {
- processed = `# REFLECTIONS\n\n${processed}`;
- }
- break;
-
- case TransformationType.ANALYZE_PAPER: {
- // 确保论文分析包含所有必需的部分
- const requiredSections = ['PURPOSE', 'CONTRIBUTION', 'KEY FINDINGS', 'IMPLICATIONS', 'LIMITATIONS'];
- const hasAllSections = requiredSections.every(section =>
- processed.toUpperCase().includes(section)
- );
-
- if (!hasAllSections) {
- // 如果缺少某些部分,添加提示
- processed += '\n\n*注意:某些分析部分可能不完整,建议重新处理或检查原始内容。*';
- }
- break;
- }
- }
-
- return processed;
-}
-
-/**
- * 批量执行转换
- */
-export async function runBatchTransformations(
- content: string,
- transformationTypes: TransformationType[],
- settings: InfioSettings,
- model?: LLMModel
-): Promise> {
- const results: Record = {};
-
- // 并行执行所有转换
- const promises = transformationTypes.map(async (type) => {
- const result = await runTransformation({
- content,
- transformationType: type,
- settings,
- model
- });
- return { type, result };
- });
-
- const completedResults = await Promise.all(promises);
-
- for (const { type, result } of completedResults) {
- results[type] = result;
- }
-
- return results;
-}
-
-/**
- * 获取所有可用的转换类型和描述
- */
-export function getAvailableTransformations(): Array<{ type: TransformationType, description: string }> {
- return Object.values(TRANSFORMATIONS).map(config => ({
- type: config.type,
- description: config.description
- }));
-}
diff --git a/src/core/transformations/trans-engine.ts b/src/core/transformations/trans-engine.ts
new file mode 100644
index 0000000..8fcb8c4
--- /dev/null
+++ b/src/core/transformations/trans-engine.ts
@@ -0,0 +1,683 @@
+import { Result, err, ok } from "neverthrow";
+import { App } from 'obsidian';
+
+import { DBManager } from '../../database/database-manager';
+import { InsightManager } from '../../database/modules/insight/insight-manager';
+import { EmbeddingModel } from '../../types/embedding';
+import { LLMModel } from '../../types/llm/model';
+import { RequestMessage } from '../../types/llm/request';
+import { InfioSettings } from '../../types/settings';
+import { readTFileContentPdf } from '../../utils/obsidian';
+import { tokenCount } from '../../utils/token';
+import LLMManager from '../llm/manager';
+import { ANALYZE_PAPER_DESCRIPTION, ANALYZE_PAPER_PROMPT } from '../prompts/transformations/analyze-paper';
+import { DENSE_SUMMARY_DESCRIPTION, DENSE_SUMMARY_PROMPT } from '../prompts/transformations/dense-summary';
+import { KEY_INSIGHTS_DESCRIPTION, KEY_INSIGHTS_PROMPT } from '../prompts/transformations/key-insights';
+import { REFLECTIONS_DESCRIPTION, REFLECTIONS_PROMPT } from '../prompts/transformations/reflections';
+import { SIMPLE_SUMMARY_DESCRIPTION, SIMPLE_SUMMARY_PROMPT } from '../prompts/transformations/simple-summary';
+import { TABLE_OF_CONTENTS_DESCRIPTION, TABLE_OF_CONTENTS_PROMPT } from '../prompts/transformations/table-of-contents';
+import { getEmbeddingModel } from '../rag/embedding';
+
+// 转换类型枚举
+export enum TransformationType {
+ DENSE_SUMMARY = 'dense_summary',
+ ANALYZE_PAPER = 'analyze_paper',
+ SIMPLE_SUMMARY = 'simple_summary',
+ KEY_INSIGHTS = 'key_insights',
+ TABLE_OF_CONTENTS = 'table_of_contents',
+ REFLECTIONS = 'reflections'
+}
+
+// 转换配置接口
+export interface TransformationConfig {
+ type: TransformationType;
+ prompt: string;
+ description: string;
+ maxTokens?: number;
+}
+
+// 所有可用的转换配置
+export const TRANSFORMATIONS: Record = {
+ [TransformationType.DENSE_SUMMARY]: {
+ type: TransformationType.DENSE_SUMMARY,
+ prompt: DENSE_SUMMARY_PROMPT,
+ description: DENSE_SUMMARY_DESCRIPTION,
+ maxTokens: 4000
+ },
+ [TransformationType.ANALYZE_PAPER]: {
+ type: TransformationType.ANALYZE_PAPER,
+ prompt: ANALYZE_PAPER_PROMPT,
+ description: ANALYZE_PAPER_DESCRIPTION,
+ maxTokens: 3000
+ },
+ [TransformationType.SIMPLE_SUMMARY]: {
+ type: TransformationType.SIMPLE_SUMMARY,
+ prompt: SIMPLE_SUMMARY_PROMPT,
+ description: SIMPLE_SUMMARY_DESCRIPTION,
+ maxTokens: 2000
+ },
+ [TransformationType.KEY_INSIGHTS]: {
+ type: TransformationType.KEY_INSIGHTS,
+ prompt: KEY_INSIGHTS_PROMPT,
+ description: KEY_INSIGHTS_DESCRIPTION,
+ maxTokens: 3000
+ },
+ [TransformationType.TABLE_OF_CONTENTS]: {
+ type: TransformationType.TABLE_OF_CONTENTS,
+ prompt: TABLE_OF_CONTENTS_PROMPT,
+ description: TABLE_OF_CONTENTS_DESCRIPTION,
+ maxTokens: 2000
+ },
+ [TransformationType.REFLECTIONS]: {
+ type: TransformationType.REFLECTIONS,
+ prompt: REFLECTIONS_PROMPT,
+ description: REFLECTIONS_DESCRIPTION,
+ maxTokens: 2500
+ }
+};
+
+// 转换参数接口
+export interface TransformationParams {
+ filePath: string; // 必须的文件路径
+ contentType?: 'document' | 'tag' | 'folder';
+ transformationType: TransformationType;
+ model?: LLMModel;
+ maxContentTokens?: number;
+ saveToDatabase?: boolean;
+}
+
+// 转换结果接口
+export interface TransformationResult {
+ success: boolean;
+ result?: string;
+ error?: string;
+ truncated?: boolean;
+ originalTokens?: number;
+ processedTokens?: number;
+}
+
+/**
+ * LLM 客户端类,用于与语言模型交互
+ */
+class TransformationLLMClient {
+ private llm: LLMManager;
+ private model: LLMModel;
+
+ constructor(llm: LLMManager, model: LLMModel) {
+ this.llm = llm;
+ this.model = model;
+ }
+
+ async queryChatModel(messages: RequestMessage[]): Promise> {
+ try {
+ const stream = await this.llm.streamResponse(
+ this.model,
+ {
+ messages: messages,
+ model: this.model.modelId,
+ stream: true,
+ }
+ );
+
+ let response_content = "";
+ for await (const chunk of stream) {
+ const content = chunk.choices[0]?.delta?.content ?? '';
+ response_content += content;
+ }
+ return ok(response_content);
+ } catch (error) {
+ return err(error instanceof Error ? error : new Error(String(error)));
+ }
+ }
+}
+
+/**
+ * 文档内容处理类
+ */
+class DocumentProcessor {
+ private static readonly DEFAULT_MAX_TOKENS = 12000; // 默认最大 token 数
+ private static readonly MIN_CONTENT_LENGTH = 100; // 最小内容长度(字符数)
+
+ /**
+ * 检查和处理文档内容大小
+ */
+ static async processContent(content: string, maxTokens: number = this.DEFAULT_MAX_TOKENS): Promise<{
+ processedContent: string;
+ truncated: boolean;
+ originalTokens: number;
+ processedTokens: number;
+ }> {
+ const originalTokens = await tokenCount(content);
+
+ if (originalTokens <= maxTokens) {
+ return {
+ processedContent: content,
+ truncated: false,
+ originalTokens,
+ processedTokens: originalTokens
+ };
+ }
+
+ // 智能截断:基于 token 数量和内容边界
+ // 先按字符比例粗略估算截断位置
+ const estimatedCharRatio = content.length / originalTokens;
+ const estimatedCharLimit = Math.floor(maxTokens * estimatedCharRatio * 0.9); // 留一些缓冲
+
+ let truncatedContent = content.substring(0, estimatedCharLimit);
+
+ // 查找最后一个完整句子的结束位置
+ const lastSentenceEnd = Math.max(
+ truncatedContent.lastIndexOf('.'),
+ truncatedContent.lastIndexOf('!'),
+ truncatedContent.lastIndexOf('?'),
+ truncatedContent.lastIndexOf('。'),
+ truncatedContent.lastIndexOf('!'),
+ truncatedContent.lastIndexOf('?')
+ );
+
+ // 查找最后一个段落的结束位置
+ const lastParagraphEnd = truncatedContent.lastIndexOf('\n\n');
+
+ // 选择最合适的截断位置
+ const cutoffPosition = Math.max(lastSentenceEnd, lastParagraphEnd);
+
+ if (cutoffPosition > estimatedCharLimit * 0.8) { // 如果截断位置不会丢失太多内容
+ truncatedContent = content.substring(0, cutoffPosition + 1);
+ }
+
+ // 确保截断后的内容不会太短
+ if (truncatedContent.length < this.MIN_CONTENT_LENGTH) {
+ // 按字符比例回退到安全长度
+ const safeCharLimit = Math.max(this.MIN_CONTENT_LENGTH, Math.floor(maxTokens * estimatedCharRatio * 0.8));
+ truncatedContent = content.substring(0, Math.min(safeCharLimit, content.length));
+ }
+
+ // 验证最终的 token 数量
+ const finalTokens = await tokenCount(truncatedContent);
+
+ // 如果仍然超过限制,进行更精确的截断
+ if (finalTokens > maxTokens) {
+ const adjustedRatio = truncatedContent.length / finalTokens;
+ const adjustedCharLimit = Math.floor(maxTokens * adjustedRatio);
+ truncatedContent = content.substring(0, adjustedCharLimit);
+ }
+
+ const processedTokens = await tokenCount(truncatedContent);
+
+ return {
+ processedContent: truncatedContent,
+ truncated: true,
+ originalTokens,
+ processedTokens
+ };
+ }
+
+ /**
+ * 验证内容是否适合处理
+ */
+ static validateContent(content: string): Result {
+ if (!content || content.trim().length === 0) {
+ return err(new Error('内容不能为空'));
+ }
+
+ if (content.length < this.MIN_CONTENT_LENGTH) {
+ return err(new Error(`内容长度至少需要 ${this.MIN_CONTENT_LENGTH} 个字符`));
+ }
+
+ return ok(undefined);
+ }
+}
+
+/**
+ * 转换引擎类
+ */
+export class TransEngine {
+ private app: App;
+ private settings: InfioSettings;
+ private llmManager: LLMManager;
+ private insightManager: InsightManager | null = null;
+ private embeddingModel: EmbeddingModel | null = null;
+
+ constructor(
+ app: App,
+ settings: InfioSettings,
+ dbManager: DBManager,
+ ) {
+ this.app = app;
+ this.settings = settings;
+ this.llmManager = new LLMManager(settings);
+ this.insightManager = dbManager.getInsightManager();
+
+ // 初始化 embedding model
+ if (settings.embeddingModelId && settings.embeddingModelId.trim() !== '') {
+ try {
+ this.embeddingModel = getEmbeddingModel(settings);
+ } catch (error) {
+ console.warn('Failed to initialize embedding model:', error);
+ this.embeddingModel = null;
+ }
+ } else {
+ this.embeddingModel = null;
+ }
+ }
+
+ cleanup() {
+ this.embeddingModel = null;
+ this.insightManager = null;
+ }
+
+ setSettings(settings: InfioSettings) {
+ this.settings = settings;
+ this.llmManager = new LLMManager(settings);
+
+ // 重新初始化 embedding model
+ if (settings.embeddingModelId && settings.embeddingModelId.trim() !== '') {
+ try {
+ this.embeddingModel = getEmbeddingModel(settings);
+ } catch (error) {
+ console.warn('Failed to initialize embedding model:', error);
+ this.embeddingModel = null;
+ }
+ } else {
+ this.embeddingModel = null;
+ }
+ }
+
+ /**
+ * 获取文件元信息的方法
+ */
+ private async getFileMetadata(filePath: string): Promise<
+ | {
+ success: true;
+ fileExists: true;
+ sourcePath: string;
+ sourceMtime: number;
+ }
+ | {
+ success: false;
+ error: string;
+ }
+ > {
+ const targetFile = this.app.vault.getFileByPath(filePath);
+ if (!targetFile) {
+ return {
+ success: false,
+ error: `文件不存在: ${filePath}`
+ };
+ }
+
+ return {
+ success: true,
+ fileExists: true,
+ sourcePath: filePath,
+ sourceMtime: targetFile.stat.mtime
+ };
+ }
+
+ /**
+ * 检查数据库缓存的方法
+ */
+ private async checkDatabaseCache(
+ sourcePath: string,
+ sourceMtime: number,
+ transformationType: TransformationType
+ ): Promise<
+ | {
+ success: true;
+ foundCache: true;
+ result: TransformationResult;
+ }
+ | {
+ success: true;
+ foundCache: false;
+ }
+ > {
+ // 如果没有必要的参数,跳过缓存检查
+ if (!this.embeddingModel || !this.insightManager) {
+ console.log("no embeddingModel or insightManager");
+ return {
+ success: true,
+ foundCache: false
+ };
+ }
+
+ try {
+ const existingInsights = await this.insightManager.getInsightsBySourcePath(sourcePath, this.embeddingModel);
+ console.log("existingInsights", existingInsights);
+
+ // 查找匹配的转换类型和修改时间的洞察
+ const matchingInsight = existingInsights.find(insight =>
+ insight.insight_type === transformationType &&
+ insight.source_mtime === sourceMtime
+ );
+
+ if (matchingInsight) {
+ // 找到匹配的缓存结果,直接返回
+ console.log(`使用缓存的转换结果: ${transformationType} for ${sourcePath}`);
+ return {
+ success: true,
+ foundCache: true,
+ result: {
+ success: true,
+ result: matchingInsight.insight,
+ truncated: false, // 缓存的结果不涉及截断
+ originalTokens: 0, // 缓存结果不需要提供token信息
+ processedTokens: 0
+ }
+ };
+ }
+
+ return {
+ success: true,
+ foundCache: false
+ };
+ } catch (cacheError) {
+ console.warn('查询缓存失败,继续执行转换:', cacheError);
+ // 缓存查询失败不影响主流程
+ return {
+ success: true,
+ foundCache: false
+ };
+ }
+ }
+
+ /**
+ * 获取文件内容的方法
+ */
+ private async getFileContent(filePath: string): Promise<
+ | {
+ success: true;
+ fileContent: string;
+ }
+ | {
+ success: false;
+ error: string;
+ }
+ > {
+ const targetFile = this.app.vault.getFileByPath(filePath);
+ if (!targetFile) {
+ return {
+ success: false,
+ error: `文件不存在: ${filePath}`
+ };
+ }
+
+ try {
+ const fileContent = await readTFileContentPdf(targetFile, this.app.vault, this.app);
+ return {
+ success: true,
+ fileContent
+ };
+ } catch (error) {
+ return {
+ success: false,
+ error: `读取文件失败: ${error instanceof Error ? error.message : String(error)}`
+ };
+ }
+ }
+
+ /**
+ * 保存转换结果到数据库的方法
+ */
+ private async saveResultToDatabase(
+ result: string,
+ transformationType: TransformationType,
+ sourcePath: string,
+ sourceMtime: number,
+ contentType: string
+ ): Promise {
+ if (!this.embeddingModel || !this.insightManager) {
+ return;
+ }
+
+ try {
+ // 生成洞察内容的嵌入向量
+ const insightEmbedding = await this.embeddingModel.getEmbedding(result);
+
+ // 保存到数据库
+ await this.insightManager.storeInsight(
+ {
+ insightType: transformationType,
+ insight: result,
+ sourceType: contentType,
+ sourcePath: sourcePath,
+ sourceMtime: sourceMtime,
+ embedding: insightEmbedding,
+ },
+ this.embeddingModel
+ );
+
+ console.log(`转换结果已成功保存到数据库: ${transformationType} for ${sourcePath}`);
+ } catch (dbError) {
+ console.warn('保存洞察到数据库失败:', dbError);
+ // 后台任务失败不影响主要的转换结果
+ }
+ }
+
+ /**
+ * 主要的转换执行方法
+ */
+ async runTransformation(params: TransformationParams): Promise {
+ console.log("runTransformation", params);
+ const {
+ filePath,
+ contentType = 'document',
+ transformationType,
+ model,
+ maxContentTokens,
+ saveToDatabase = false
+ } = params;
+
+ try {
+ // 第一步:获取文件元信息
+ const metadataResult = await this.getFileMetadata(filePath);
+
+ if (!metadataResult.success) {
+ return {
+ success: false,
+ error: metadataResult.error
+ };
+ }
+
+ // 此时TypeScript知道metadataResult.success为true
+ const { sourcePath, sourceMtime } = metadataResult;
+
+ // 第二步:检查数据库缓存
+ const cacheCheckResult = await this.checkDatabaseCache(
+ sourcePath,
+ sourceMtime,
+ transformationType
+ );
+
+ if (cacheCheckResult.foundCache) {
+ return cacheCheckResult.result;
+ }
+
+ // 第三步:获取文件内容(只有在没有缓存时才执行)
+ const fileContentResult = await this.getFileContent(filePath);
+
+ if (!fileContentResult.success) {
+ return {
+ success: false,
+ error: fileContentResult.error
+ };
+ }
+
+ // 此时TypeScript知道fileContentResult.success为true
+ const { fileContent } = fileContentResult;
+
+ // 验证内容
+ const contentValidation = DocumentProcessor.validateContent(fileContent);
+ if (contentValidation.isErr()) {
+ return {
+ success: false,
+ error: contentValidation.error.message
+ };
+ }
+
+ // 获取转换配置
+ const transformationConfig = TRANSFORMATIONS[transformationType];
+ if (!transformationConfig) {
+ return {
+ success: false,
+ error: `不支持的转换类型: ${transformationType}`
+ };
+ }
+
+ // 处理文档内容(检查 token 数量并截断)
+ const tokenLimit = maxContentTokens || DocumentProcessor['DEFAULT_MAX_TOKENS'];
+ const processedDocument = await DocumentProcessor.processContent(fileContent, tokenLimit);
+
+ // 使用默认模型或传入的模型
+ const llmModel: LLMModel = model || {
+ provider: this.settings.applyModelProvider,
+ modelId: this.settings.applyModelId,
+ };
+
+ // 创建 LLM 客户端
+ const client = new TransformationLLMClient(this.llmManager, llmModel);
+
+ // 构建请求消息
+ const messages: RequestMessage[] = [
+ {
+ role: 'system',
+ content: transformationConfig.prompt
+ },
+ {
+ role: 'user',
+ content: processedDocument.processedContent
+ }
+ ];
+
+ // 调用 LLM 执行转换
+ const result = await client.queryChatModel(messages);
+
+ if (result.isErr()) {
+ return {
+ success: false,
+ error: `LLM 调用失败: ${result.error.message}`,
+ truncated: processedDocument.truncated,
+ originalTokens: processedDocument.originalTokens,
+ processedTokens: processedDocument.processedTokens
+ };
+ }
+
+ // 后处理结果
+ const processedResult = this.postProcessResult(result.value, transformationType);
+
+ // 保存转换结果到数据库(后台任务,不阻塞主流程)
+ if (saveToDatabase) {
+ // 创建后台任务,不使用 await
+ (async () => {
+ await this.saveResultToDatabase(
+ processedResult,
+ transformationType,
+ sourcePath,
+ sourceMtime,
+ contentType
+ );
+ })(); // 立即执行异步函数,但不等待其完成
+ }
+
+ return {
+ success: true,
+ result: processedResult,
+ truncated: processedDocument.truncated,
+ originalTokens: processedDocument.originalTokens,
+ processedTokens: processedDocument.processedTokens
+ };
+
+ } catch (error) {
+ return {
+ success: false,
+ error: `转换过程中出现错误: ${error instanceof Error ? error.message : String(error)}`
+ };
+ }
+ }
+
+ /**
+ * 后处理转换结果
+ */
+ private postProcessResult(result: string, transformationType: TransformationType): string {
+ let processed = result.trim();
+
+ // 移除可能的 markdown 代码块标记
+ processed = processed.replace(/^```[\w]*\n/, '').replace(/\n```$/, '');
+
+ // 根据转换类型进行特定的后处理
+ switch (transformationType) {
+ case TransformationType.KEY_INSIGHTS:
+ // 确保 insights 格式正确
+ if (!processed.includes('INSIGHTS')) {
+ processed = `# INSIGHTS\n\n${processed}`;
+ }
+ break;
+
+ case TransformationType.REFLECTIONS:
+ // 确保 reflections 格式正确
+ if (!processed.includes('REFLECTIONS')) {
+ processed = `# REFLECTIONS\n\n${processed}`;
+ }
+ break;
+
+ case TransformationType.ANALYZE_PAPER: {
+ // 确保论文分析包含所有必需的部分
+ const requiredSections = ['PURPOSE', 'CONTRIBUTION', 'KEY FINDINGS', 'IMPLICATIONS', 'LIMITATIONS'];
+ const hasAllSections = requiredSections.every(section =>
+ processed.toUpperCase().includes(section)
+ );
+
+ if (!hasAllSections) {
+ // 如果缺少某些部分,添加提示
+ processed += '\n\n*注意:某些分析部分可能不完整,建议重新处理或检查原始内容。*';
+ }
+ break;
+ }
+ }
+
+ return processed;
+ }
+
+ /**
+ * 批量执行转换
+ */
+ async runBatchTransformations(
+ filePath: string,
+ transformationTypes: TransformationType[],
+ options?: {
+ model?: LLMModel;
+ saveToDatabase?: boolean;
+ }
+ ): Promise> {
+ const results: Record = {};
+
+ // 并行执行所有转换
+ const promises = transformationTypes.map(async (type) => {
+ const result = await this.runTransformation({
+ filePath: filePath,
+ transformationType: type,
+ model: options?.model,
+ saveToDatabase: options?.saveToDatabase
+ });
+ return { type, result };
+ });
+
+ const completedResults = await Promise.all(promises);
+
+ for (const { type, result } of completedResults) {
+ results[type] = result;
+ }
+
+ return results;
+ }
+
+ /**
+ * 获取所有可用的转换类型和描述
+ */
+ static getAvailableTransformations(): Array<{ type: TransformationType, description: string }> {
+ return Object.values(TRANSFORMATIONS).map(config => ({
+ type: config.type,
+ description: config.description
+ }));
+ }
+}
diff --git a/src/core/transformations/usage-example.ts b/src/core/transformations/usage-example.ts
deleted file mode 100644
index db86ff2..0000000
--- a/src/core/transformations/usage-example.ts
+++ /dev/null
@@ -1,181 +0,0 @@
-import { InfioSettings } from '../../types/settings';
-
-import {
- TransformationType,
- getAvailableTransformations,
- runBatchTransformations,
- runTransformation,
-} from './run_trans';
-
-/**
- * 使用示例:单个转换
- */
-export async function exampleSingleTransformation(settings: InfioSettings) {
- const sampleContent = `
- 人工智能技术正在快速发展,特别是大型语言模型的出现,彻底改变了我们与计算机交互的方式。
- 这些模型能够理解和生成人类语言,在多个领域展现出令人印象深刻的能力。
-
- 然而,随着AI技术的普及,我们也面临着新的挑战,包括伦理问题、隐私保护、
- 以及如何确保AI技术的安全和可控发展。这些问题需要全社会的共同关注和努力。
-
- 未来,人工智能将继续在教育、医疗、商业等领域发挥重要作用,
- 但我们必须在推进技术发展的同时,确保技术服务于人类的福祉。
- `;
-
- try {
- // 执行简单摘要转换
- const result = await runTransformation({
- content: sampleContent,
- transformationType: TransformationType.SIMPLE_SUMMARY,
- settings: settings
- });
-
- if (result.success) {
- console.log('转换成功!');
- console.log('结果:', result.result);
-
- if (result.truncated) {
- console.log(`注意:内容被截断 (${result.originalLength} -> ${result.processedLength} 字符)`);
- }
- } else {
- console.error('转换失败:', result.error);
- }
-
- return result;
- } catch (error) {
- console.error('执行转换时出错:', error);
- throw error;
- }
-}
-
-/**
- * 使用示例:批量转换
- */
-export async function exampleBatchTransformations(settings: InfioSettings) {
- const sampleContent = `
- 区块链技术作为一种分布式账本技术,具有去中心化、不可篡改、透明公开等特点。
- 它最初是为比特币而设计的底层技术,但现在已经扩展到各个行业和应用场景。
-
- 在金融领域,区块链可以用于跨境支付、供应链金融、数字货币等;
- 在供应链管理中,它能够提供产品溯源和防伪验证;
- 在数字身份认证方面,区块链可以建立更安全可靠的身份管理系统。
-
- 尽管区块链技术有很多优势,但它也面临着可扩展性、能耗、监管等挑战。
- 随着技术的不断成熟和完善,相信这些问题会逐步得到解决。
- 区块链技术的未来发展值得期待,它将为数字经济的发展提供重要的技术支撑。
- `;
-
- try {
- // 同时执行多种转换
- const transformationTypes = [
- TransformationType.SIMPLE_SUMMARY,
- TransformationType.KEY_INSIGHTS,
- TransformationType.TABLE_OF_CONTENTS
- ];
-
- const results = await runBatchTransformations(
- sampleContent,
- transformationTypes,
- settings
- );
-
- console.log('批量转换完成!');
-
- for (const [type, result] of Object.entries(results)) {
- console.log(`\n=== ${type.toUpperCase()} ===`);
- if (result.success) {
- console.log(result.result);
- } else {
- console.error('失败:', result.error);
- }
- }
-
- return results;
- } catch (error) {
- console.error('执行批量转换时出错:', error);
- throw error;
- }
-}
-
-/**
- * 使用示例:处理长文档(会被截断)
- */
-export async function exampleLongDocumentProcessing(settings: InfioSettings) {
- // 模拟一个很长的文档
- const longContent = '这是一个很长的文档内容。'.repeat(10000); // 约50万字符
-
- try {
- const result = await runTransformation({
- content: longContent,
- transformationType: TransformationType.DENSE_SUMMARY,
- settings: settings,
- maxContentLength: 30000 // 设置最大内容长度
- });
-
- if (result.success) {
- console.log('长文档转换成功!');
- console.log('原始长度:', result.originalLength);
- console.log('处理后长度:', result.processedLength);
- console.log('是否被截断:', result.truncated);
- console.log('结果长度:', result.result?.length);
- } else {
- console.error('转换失败:', result.error);
- }
-
- return result;
- } catch (error) {
- console.error('处理长文档时出错:', error);
- throw error;
- }
-}
-
-/**
- * 使用示例:获取所有可用的转换类型
- */
-export function exampleGetAvailableTransformations() {
- const availableTransformations = getAvailableTransformations();
-
- console.log('可用的转换类型:');
- availableTransformations.forEach((transformation, index) => {
- console.log(`${index + 1}. ${transformation.type}: ${transformation.description}`);
- });
-
- return availableTransformations;
-}
-
-/**
- * 使用示例:错误处理
- */
-export async function exampleErrorHandling(settings: InfioSettings) {
- try {
- // 测试空内容
- const emptyResult = await runTransformation({
- content: '',
- transformationType: TransformationType.SIMPLE_SUMMARY,
- settings: settings
- });
-
- console.log('空内容测试:', emptyResult);
-
- // 测试太短的内容
- const shortResult = await runTransformation({
- content: '太短',
- transformationType: TransformationType.SIMPLE_SUMMARY,
- settings: settings
- });
-
- console.log('短内容测试:', shortResult);
-
- // 测试无效的转换类型(需要类型断言来测试)
- const invalidResult = await runTransformation({
- content: '这是一些测试内容,用于测试无效的转换类型处理。',
- transformationType: 'invalid-type' as TransformationType,
- settings: settings
- });
-
- console.log('无效类型测试:', invalidResult);
-
- } catch (error) {
- console.error('错误处理测试时出错:', error);
- }
-}
diff --git a/src/database/modules/insight/insight-manager.ts b/src/database/modules/insight/insight-manager.ts
index 18d1f2c..c938563 100644
--- a/src/database/modules/insight/insight-manager.ts
+++ b/src/database/modules/insight/insight-manager.ts
@@ -1,8 +1,8 @@
import { App, TFile } from 'obsidian'
-import { InsertSourceInsight, SelectSourceInsight } from '../../schema'
import { EmbeddingModel } from '../../../types/embedding'
import { DBManager } from '../../database-manager'
+import { InsertSourceInsight, SelectSourceInsight } from '../../schema'
import { InsightRepository } from './insight-repository'
@@ -51,6 +51,7 @@ export class InsightManager {
insight: string
sourceType: 'document' | 'tag' | 'folder'
sourcePath: string
+ sourceMtime: number
embedding: number[]
},
embeddingModel: EmbeddingModel,
@@ -60,6 +61,7 @@ export class InsightManager {
insight: insightData.insight,
source_type: insightData.sourceType,
source_path: insightData.sourcePath,
+ source_mtime: insightData.sourceMtime,
embedding: insightData.embedding,
}
@@ -75,6 +77,7 @@ export class InsightManager {
insight: string
sourceType: 'document' | 'tag' | 'folder'
sourcePath: string
+ sourceMtime: number
embedding: number[]
}>,
embeddingModel: EmbeddingModel,
@@ -84,6 +87,7 @@ export class InsightManager {
insight: data.insight,
source_type: data.sourceType,
source_path: data.sourcePath,
+ source_mtime: data.sourceMtime,
embedding: data.embedding,
}))
@@ -100,6 +104,7 @@ export class InsightManager {
insight?: string
sourceType?: 'document' | 'tag' | 'folder'
sourcePath?: string
+ sourceMtime?: number
embedding?: number[]
},
embeddingModel: EmbeddingModel,
@@ -118,6 +123,9 @@ export class InsightManager {
if (updates.sourcePath !== undefined) {
updateData.source_path = updates.sourcePath
}
+ if (updates.sourceMtime !== undefined) {
+ updateData.source_mtime = updates.sourceMtime
+ }
if (updates.embedding !== undefined) {
updateData.embedding = updates.embedding
}
@@ -318,4 +326,26 @@ export class InsightManager {
return filteredInsights
}
+
+ // /**
+ // * 根据源文件修改时间范围获取洞察
+ // */
+ // async getInsightsByMtimeRange(
+ // minMtime: number,
+ // maxMtime: number,
+ // embeddingModel: EmbeddingModel,
+ // ): Promise {
+ // return await this.repository.getInsightsByMtimeRange(minMtime, maxMtime, embeddingModel)
+ // }
+
+ // /**
+ // * 根据源文件修改时间获取需要更新的洞察
+ // */
+ // async getOutdatedInsights(
+ // sourcePath: string,
+ // currentMtime: number,
+ // embeddingModel: EmbeddingModel,
+ // ): Promise {
+ // return await this.repository.getOutdatedInsights(sourcePath, currentMtime, embeddingModel)
+ // }
}
diff --git a/src/database/modules/insight/insight-repository.ts b/src/database/modules/insight/insight-repository.ts
index d3bb37e..5045cfa 100644
--- a/src/database/modules/insight/insight-repository.ts
+++ b/src/database/modules/insight/insight-repository.ts
@@ -139,8 +139,8 @@ export class InsightRepository {
// 构建批量插入的 SQL
const values = data.map((insight, index) => {
- const offset = index * 6
- return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5}, $${offset + 6})`
+ const offset = index * 7
+ return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5}, $${offset + 6}, $${offset + 7})`
}).join(',')
const params = data.flatMap(insight => [
@@ -148,12 +148,13 @@ export class InsightRepository {
insight.insight.replace(/\0/g, ''), // 清理null字节
insight.source_type,
insight.source_path,
+ insight.source_mtime,
`[${insight.embedding.join(',')}]`, // 转换为PostgreSQL vector格式
new Date() // updated_at
])
await this.db.query(
- `INSERT INTO "${tableName}" (insight_type, insight, source_type, source_path, embedding, updated_at)
+ `INSERT INTO "${tableName}" (insight_type, insight, source_type, source_path, source_mtime, embedding, updated_at)
VALUES ${values}`,
params
)
@@ -197,6 +198,12 @@ export class InsightRepository {
paramIndex++
}
+ if (data.source_mtime !== undefined) {
+ fields.push(`source_mtime = $${paramIndex}`)
+ params.push(data.source_mtime)
+ paramIndex++
+ }
+
if (data.embedding !== undefined) {
fields.push(`embedding = $${paramIndex}`)
params.push(`[${data.embedding.join(',')}]`)
@@ -235,7 +242,7 @@ export class InsightRepository {
}
const tableName = this.getTableName(embeddingModel)
- let whereConditions = ['1 - (embedding <=> $1::vector) > $2']
+ const whereConditions: string[] = ['1 - (embedding <=> $1::vector) > $2']
const params: unknown[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
let paramIndex = 4
@@ -259,7 +266,7 @@ export class InsightRepository {
const query = `
SELECT
- id, insight_type, insight, source_type, source_path, created_at, updated_at,
+ id, insight_type, insight, source_type, source_path, source_mtime, created_at, updated_at,
1 - (embedding <=> $1::vector) as similarity
FROM "${tableName}"
WHERE ${whereConditions.join(' AND ')}
@@ -271,4 +278,36 @@ export class InsightRepository {
const result = await this.db.query(query, params)
return result.rows
}
+
+ // async getInsightsByMtimeRange(
+ // minMtime: number,
+ // maxMtime: number,
+ // embeddingModel: EmbeddingModel,
+ // ): Promise {
+ // if (!this.db) {
+ // throw new DatabaseNotInitializedException()
+ // }
+ // const tableName = this.getTableName(embeddingModel)
+ // const result = await this.db.query(
+ // `SELECT * FROM "${tableName}" WHERE source_mtime >= $1 AND source_mtime <= $2 ORDER BY created_at DESC`,
+ // [minMtime, maxMtime]
+ // )
+ // return result.rows
+ // }
+
+ // async getOutdatedInsights(
+ // sourcePath: string,
+ // currentMtime: number,
+ // embeddingModel: EmbeddingModel,
+ // ): Promise {
+ // if (!this.db) {
+ // throw new DatabaseNotInitializedException()
+ // }
+ // const tableName = this.getTableName(embeddingModel)
+ // const result = await this.db.query(
+ // `SELECT * FROM "${tableName}" WHERE source_path = $1 AND source_mtime < $2 ORDER BY created_at DESC`,
+ // [sourcePath, currentMtime]
+ // )
+ // return result.rows
+ // }
}
diff --git a/src/database/schema.ts b/src/database/schema.ts
index 6d98ba0..e3140f0 100644
--- a/src/database/schema.ts
+++ b/src/database/schema.ts
@@ -1,7 +1,6 @@
import { SerializedLexicalNode } from 'lexical'
import { SUPPORT_EMBEDDING_SIMENTION } from '../constants'
-import { ApplyStatus } from '../types/apply'
// import { EmbeddingModelId } from '../types/embedding'
// PostgreSQL column types
@@ -184,6 +183,7 @@ export type SourceInsightRecord = {
insight: string
source_type: 'document' | 'tag' | 'folder'
source_path: string
+ source_mtime: number
embedding: number[]
created_at: Date
updated_at: Date
@@ -203,6 +203,7 @@ const createSourceInsightTable = (dimension: number): TableDefinition => {
insight: { type: 'TEXT', notNull: true },
source_type: { type: 'TEXT', notNull: true },
source_path: { type: 'TEXT', notNull: true },
+ source_mtime: { type: 'BIGINT', notNull: true },
embedding: { type: 'VECTOR', dimensions: dimension },
created_at: { type: 'TIMESTAMP', notNull: true, defaultNow: true },
updated_at: { type: 'TIMESTAMP', notNull: true, defaultNow: true }
diff --git a/src/database/sql.ts b/src/database/sql.ts
index c263baa..b088463 100644
--- a/src/database/sql.ts
+++ b/src/database/sql.ts
@@ -104,6 +104,7 @@ export const migrations: Record = {
"insight" text NOT NULL,
"source_type" text NOT NULL,
"source_path" text NOT NULL,
+ "source_mtime" bigint NOT NULL,
"embedding" vector(1536),
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL
@@ -115,6 +116,7 @@ export const migrations: Record = {
"insight" text NOT NULL,
"source_type" text NOT NULL,
"source_path" text NOT NULL,
+ "source_mtime" bigint NOT NULL,
"embedding" vector(1024),
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL
@@ -126,6 +128,7 @@ export const migrations: Record = {
"insight" text NOT NULL,
"source_type" text NOT NULL,
"source_path" text NOT NULL,
+ "source_mtime" bigint NOT NULL,
"embedding" vector(768),
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL
@@ -137,6 +140,7 @@ export const migrations: Record = {
"insight" text NOT NULL,
"source_type" text NOT NULL,
"source_path" text NOT NULL,
+ "source_mtime" bigint NOT NULL,
"embedding" vector(512),
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL
@@ -148,6 +152,7 @@ export const migrations: Record = {
"insight" text NOT NULL,
"source_type" text NOT NULL,
"source_path" text NOT NULL,
+ "source_mtime" bigint NOT NULL,
"embedding" vector(384),
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL
@@ -245,5 +250,16 @@ export const migrations: Record = {
"created_at" timestamp DEFAULT now() NOT NULL
);
`
+ },
+ add_source_mtime: {
+ description: "Adds missing source_mtime column to existing source insight tables",
+ sql: `
+ -- Add source_mtime column to existing source insight tables if it doesn't exist
+ ALTER TABLE "source_insight_1536" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0;
+ ALTER TABLE "source_insight_1024" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0;
+ ALTER TABLE "source_insight_768" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0;
+ ALTER TABLE "source_insight_512" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0;
+ ALTER TABLE "source_insight_384" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0;
+ `
}
};
diff --git a/src/main.ts b/src/main.ts
index 3cd312f..ac3e980 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -11,6 +11,7 @@ import { getDiffStrategy } from "./core/diff/DiffStrategy"
import { InlineEdit } from './core/edit/inline-edit-processor'
import { McpHub } from './core/mcp/McpHub'
import { RAGEngine } from './core/rag/rag-engine'
+import { TransEngine } from './core/transformations/trans-engine'
import { DBManager } from './database/database-manager'
import { migrateToJsonDatabase } from './database/json/migrateToJsonDatabase'
import EventListener from "./event-listener"
@@ -41,6 +42,7 @@ export default class InfioPlugin extends Plugin {
private activeLeafChangeUnloadFn: (() => void) | null = null
private dbManagerInitPromise: Promise | null = null
private ragEngineInitPromise: Promise | null = null
+ private transEngineInitPromise: Promise | null = null
private mcpHubInitPromise: Promise | null = null
settings: InfioSettings
settingTab: InfioSettingTab
@@ -49,6 +51,7 @@ export default class InfioPlugin extends Plugin {
dbManager: DBManager | null = null
mcpHub: McpHub | null = null
ragEngine: RAGEngine | null = null
+ transEngine: TransEngine | null = null
inlineEdit: InlineEdit | null = null
diffStrategy?: DiffStrategy
dataviewManager: DataviewManager | null = null
@@ -422,10 +425,14 @@ export default class InfioPlugin extends Plugin {
// Promise cleanup
this.dbManagerInitPromise = null
this.ragEngineInitPromise = null
+ this.transEngineInitPromise = null
this.mcpHubInitPromise = null
// RagEngine cleanup
this.ragEngine?.cleanup()
this.ragEngine = null
+ // TransEngine cleanup
+ this.transEngine?.cleanup()
+ this.transEngine = null
// Database cleanup
this.dbManager?.cleanup()
this.dbManager = null
@@ -445,6 +452,7 @@ export default class InfioPlugin extends Plugin {
this.settings = newSettings
await this.saveData(newSettings)
this.ragEngine?.setSettings(newSettings)
+ this.transEngine?.setSettings(newSettings)
this.settingsListeners.forEach((listener) => listener(newSettings))
}
@@ -572,6 +580,23 @@ export default class InfioPlugin extends Plugin {
return this.ragEngineInitPromise
}
+ async getTransEngine(): Promise {
+ if (this.transEngine) {
+ return this.transEngine
+ }
+
+ if (!this.transEngineInitPromise) {
+ this.transEngineInitPromise = (async () => {
+ const dbManager = await this.getDbManager()
+ this.transEngine = new TransEngine(this.app, this.settings, dbManager)
+ return this.transEngine
+ })()
+ }
+
+ // if initialization is running, wait for it to complete instead of creating a new initialization promise
+ return this.transEngineInitPromise
+ }
+
private async migrateToJsonStorage() {
try {
const dbManager = await this.getDbManager()
diff --git a/src/utils/parse-infio-block.ts b/src/utils/parse-infio-block.ts
index 79df520..5901b0e 100644
--- a/src/utils/parse-infio-block.ts
+++ b/src/utils/parse-infio-block.ts
@@ -736,7 +736,7 @@ export function parseMsgBlocks(
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
// @ts-expect-error - parse5 node value type
path = childNode.childNodes[0].value
- } else if (childNode.nodeName === 'type' && childNode.childNodes.length > 0) {
+ } else if (childNode.nodeName === 'transformation' && childNode.childNodes.length > 0) {
// @ts-expect-error - parse5 node value type
transformation = childNode.childNodes[0].value
}