]*\/>/g,
- () => ``
- ).trim();
+interface MarkdownWithIconsProps {
+ markdownContent: string;
+ finish: boolean
+ className?: string;
+ iconName?: IconType;
+ iconSize?: number;
+ iconClassName?: string;
+}
- const components = {
- span: (props: ComponentPropsWithoutRef<'span'> & {
- 'data-icon'?: string;
- 'data-size'?: string;
- }) => {
- if (props['data-icon']) {
- const name = props['data-icon'];
- const size = props['data-size'] ? Number(props['data-size']) : 16;
- const className = props.className || '';
+const MarkdownWithIcons = ({
+ markdownContent,
+ finish,
+ className,
+ iconName,
+ iconSize = 14,
+ iconClassName = "infio-markdown-icon"
+}: MarkdownWithIconsProps) => {
+ // Handle icon rendering directly without string manipulation
+ const renderIcon = (): ReactNode => {
+ if (!iconName) return null;
- switch (name) {
- case 'ask_followup_question':
- return ;
- case 'attempt_completion':
- return ;
- default:
- return null;
- }
- }
- return ;
- },
+ switch (iconName) {
+ case 'ask_followup_question':
+ return ;
+ case 'attempt_completion':
+ return ;
+ default:
+ return null;
+ }
};
+ const renderTitle = (): ReactNode => {
+ if (!iconName) return null;
+
+ switch (iconName) {
+ case 'ask_followup_question':
+ return 'Ask Followup Question:';
+ case 'attempt_completion':
+ return 'Task Completion';
+ default:
+ return null;
+ }
+ };
+
+ // Component for markdown content
return (
<>
-
- {processedContent}
-
- {processedContent &&
+
+ {iconName && renderIcon()} {renderTitle()}
+
+ {markdownContent}
+
+
+ {markdownContent && finish &&
-
-
+
+
}
>
-
);
};
diff --git a/src/components/chat-view/ReactMarkdown.tsx b/src/components/chat-view/ReactMarkdown.tsx
index 3db924a..6a95486 100644
--- a/src/components/chat-view/ReactMarkdown.tsx
+++ b/src/components/chat-view/ReactMarkdown.tsx
@@ -123,16 +123,22 @@ function ReactMarkdown({
- ${block.result && block.result.trimStart()}`} />
+ markdownContent={block.result}
+ finish={block.finish}
+ iconName="attempt_completion"
+ iconSize={14}
+ iconClassName="infio-markdown-icon"
+ />
) : block.type === 'ask_followup_question' ? (
- ${block.question && block.question.trimStart()}`} />
+ markdownContent={block.question}
+ finish={block.finish}
+ iconName="ask_followup_question"
+ iconSize={14}
+ iconClassName="infio-markdown-icon"
+ />
) : block.type === 'switch_mode' ? (
Your search query here
-Examples:
+Examples1:
capital of France population statistics 2023
+Examples2:
"renewable energy" growth statistics Europe
+Examples3:
react vs angular vs vue.js comparison
`
diff --git a/src/core/rag/rag-engine.ts b/src/core/rag/rag-engine.ts
index 34e12a8..1e0d48a 100644
--- a/src/core/rag/rag-engine.ts
+++ b/src/core/rag/rag-engine.ts
@@ -1,4 +1,4 @@
-import { App } from 'obsidian'
+import { App, TFile } from 'obsidian'
import { QueryProgressState } from '../../components/chat-view/QueryProgress'
import { DBManager } from '../../database/database-manager'
@@ -13,7 +13,8 @@ export class RAGEngine {
private app: App
private settings: InfioSettings
private vectorManager: VectorManager
- private embeddingModel: EmbeddingModel | null = null
+ private embeddingModel: EmbeddingModel | null = null
+ private initialized = false
constructor(
app: App,
@@ -23,7 +24,7 @@ export class RAGEngine {
this.app = app
this.settings = settings
this.vectorManager = dbManager.getVectorManager()
- this.embeddingModel = getEmbeddingModel(settings)
+ this.embeddingModel = getEmbeddingModel(settings)
}
setSettings(settings: InfioSettings) {
@@ -34,16 +35,14 @@ export class RAGEngine {
// TODO: Implement automatic vault re-indexing when settings are changed.
// Currently, users must manually re-index the vault.
async updateVaultIndex(
- options: { reindexAll: boolean } = {
- reindexAll: false,
- },
+ options: { reindexAll: boolean },
onQueryProgressChange?: (queryProgress: QueryProgressState) => void,
- ): Promise {
- if (!this.embeddingModel) {
- throw new Error('Embedding model is not set')
+ ): Promise {
+ if (!this.embeddingModel) {
+ throw new Error('Embedding model is not set')
}
await this.vectorManager.updateVaultIndex(
- this.embeddingModel,
+ this.embeddingModel,
{
chunkSize: this.settings.ragOptions.chunkSize,
excludePatterns: this.settings.ragOptions.excludePatterns,
@@ -57,7 +56,23 @@ export class RAGEngine {
})
},
)
- }
+ this.initialized = true
+ }
+
+ async updateFileIndex(file: TFile) {
+ await this.vectorManager.UpdateFileVectorIndex(
+ this.embeddingModel,
+ this.settings.ragOptions.chunkSize,
+ file,
+ )
+ }
+
+ async deleteFileIndex(file: TFile) {
+ await this.vectorManager.DeleteFileVectorIndex(
+ this.embeddingModel,
+ file,
+ )
+ }
async processQuery({
query,
@@ -78,13 +93,19 @@ export class RAGEngine {
if (!this.embeddingModel) {
throw new Error('Embedding model is not set')
}
- // TODO: Decide the vault index update strategy.
- // Current approach: Update on every query.
- await this.updateVaultIndex({ reindexAll: false }, onQueryProgressChange)
+
+ if (!this.initialized) {
+ await this.updateVaultIndex({ reindexAll: false }, onQueryProgressChange)
+ }
const queryEmbedding = await this.getQueryEmbedding(query)
onQueryProgressChange?.({
type: 'querying',
- })
+ })
+ console.log('query, ', {
+ minSimilarity: this.settings.ragOptions.minSimilarity,
+ limit: this.settings.ragOptions.limit,
+ scope,
+ })
const queryResult = await this.vectorManager.performSimilaritySearch(
queryEmbedding,
this.embeddingModel,
@@ -93,7 +114,8 @@ export class RAGEngine {
limit: this.settings.ragOptions.limit,
scope,
},
- )
+ )
+ console.log('queryResult', queryResult)
onQueryProgressChange?.({
type: 'querying-done',
queryResult,
diff --git a/src/database/database-manager.ts b/src/database/database-manager.ts
index 2bb8a6d..454cc03 100644
--- a/src/database/database-manager.ts
+++ b/src/database/database-manager.ts
@@ -1,38 +1,33 @@
-import { PGlite } from '@electric-sql/pglite'
// @ts-expect-error
-import { type PGliteWithLive, live } from '@electric-sql/pglite/live'
-import { App, normalizePath } from 'obsidian'
+import { type PGliteWithLive } from '@electric-sql/pglite/live'
+import { App } from 'obsidian'
-import { PGLITE_DB_PATH } from '../constants'
+// import { PGLITE_DB_PATH } from '../constants'
+import { createAndInitDb } from '../pgworker'
import { ConversationManager } from './modules/conversation/conversation-manager'
import { TemplateManager } from './modules/template/template-manager'
import { VectorManager } from './modules/vector/vector-manager'
-import { pgliteResources } from './pglite-resources'
-import { migrations } from './sql'
+// import { pgliteResources } from './pglite-resources'
+// import { migrations } from './sql'
export class DBManager {
- private app: App
- private dbPath: string
+ // private app: App
+ // private dbPath: string
private db: PGliteWithLive | null = null
// private db: PgliteDatabase | null = null
private vectorManager: VectorManager
private templateManager: TemplateManager
private conversationManager: ConversationManager
- constructor(app: App, dbPath: string) {
+ constructor(app: App) {
this.app = app
- this.dbPath = dbPath
+ // this.dbPath = dbPath
}
static async create(app: App): Promise {
- const dbManager = new DBManager(app, normalizePath(PGLITE_DB_PATH))
- await dbManager.loadExistingDatabase()
- if (!dbManager.db) {
- await dbManager.createNewDatabase()
- }
- await dbManager.migrateDatabase()
- await dbManager.save()
+ const dbManager = new DBManager(app)
+ dbManager.db = await createAndInitDb()
dbManager.vectorManager = new VectorManager(app, dbManager)
dbManager.templateManager = new TemplateManager(app, dbManager)
@@ -57,81 +52,70 @@ export class DBManager {
return this.conversationManager
}
- private async createNewDatabase() {
- const { fsBundle, wasmModule, vectorExtensionBundlePath } =
- await this.loadPGliteResources()
- this.db = await PGlite.create({
- fsBundle: fsBundle,
- wasmModule: wasmModule,
- extensions: {
- vector: vectorExtensionBundlePath,
- live,
- },
- })
- }
+ // private async createNewDatabase() {
+ // const { fsBundle, wasmModule, vectorExtensionBundlePath } =
+ // await this.loadPGliteResources()
+ // this.db = await PGlite.create({
+ // fsBundle: fsBundle,
+ // wasmModule: wasmModule,
+ // extensions: {
+ // vector: vectorExtensionBundlePath,
+ // live,
+ // },
+ // })
+ // }
- private async loadExistingDatabase() {
- try {
- const databaseFileExists = await this.app.vault.adapter.exists(
- this.dbPath,
- )
- if (!databaseFileExists) {
- return null
- }
- const fileBuffer = await this.app.vault.adapter.readBinary(this.dbPath)
- const fileBlob = new Blob([fileBuffer], { type: 'application/x-gzip' })
- const { fsBundle, wasmModule, vectorExtensionBundlePath } =
- await this.loadPGliteResources()
- this.db = await PGlite.create({
- loadDataDir: fileBlob,
- fsBundle: fsBundle,
- wasmModule: wasmModule,
- extensions: {
- vector: vectorExtensionBundlePath,
- live
- },
- })
- // return drizzle(this.pgClient)
- } catch (error) {
- console.error('Error loading database:', error)
- console.log(this.dbPath)
- return null
- }
- }
+ // private async loadExistingDatabase() {
+ // try {
+ // const databaseFileExists = await this.app.vault.adapter.exists(
+ // this.dbPath,
+ // )
+ // if (!databaseFileExists) {
+ // return null
+ // }
+ // const fileBuffer = await this.app.vault.adapter.readBinary(this.dbPath)
+ // const fileBlob = new Blob([fileBuffer], { type: 'application/x-gzip' })
+ // const { fsBundle, wasmModule, vectorExtensionBundlePath } =
+ // await this.loadPGliteResources()
+ // this.db = await PGlite.create({
+ // loadDataDir: fileBlob,
+ // fsBundle: fsBundle,
+ // wasmModule: wasmModule,
+ // extensions: {
+ // vector: vectorExtensionBundlePath,
+ // live
+ // },
+ // })
+ // // return drizzle(this.pgClient)
+ // } catch (error) {
+ // console.error('Error loading database:', error)
+ // console.log(this.dbPath)
+ // return null
+ // }
+ // }
- private async migrateDatabase(): Promise {
- if (!this.db) {
- throw new Error('Database client not initialized');
- }
+ // private async migrateDatabase(): Promise {
+ // if (!this.db) {
+ // throw new Error('Database client not initialized');
+ // }
- try {
- // Execute SQL migrations
- for (const [_key, migration] of Object.entries(migrations)) {
- // Split SQL into individual commands and execute them one by one
- const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
- for (const command of commands) {
- await this.db.query(command);
- }
- }
- } catch (error) {
- console.error('Error executing SQL migrations:', error);
- throw error;
- }
- }
+ // try {
+ // // Execute SQL migrations
+ // for (const [_key, migration] of Object.entries(migrations)) {
+ // // Split SQL into individual commands and execute them one by one
+ // const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
+ // for (const command of commands) {
+ // await this.db.query(command);
+ // }
+ // }
+ // } catch (error) {
+ // console.error('Error executing SQL migrations:', error);
+ // throw error;
+ // }
+ // }
async save(): Promise {
- if (!this.db) {
- return
- }
- try {
- const blob: Blob = await this.db.dumpDataDir('gzip')
- await this.app.vault.adapter.writeBinary(
- this.dbPath,
- Buffer.from(await blob.arrayBuffer()),
- )
- } catch (error) {
- console.error('Error saving database:', error)
- }
+ console.log("need remove")
}
async cleanup() {
@@ -139,37 +123,37 @@ export class DBManager {
this.db = null
}
- private async loadPGliteResources(): Promise<{
- fsBundle: Blob
- wasmModule: WebAssembly.Module
- vectorExtensionBundlePath: URL
- }> {
- try {
- // Convert base64 to binary data
- const wasmBinary = Buffer.from(pgliteResources.wasmBase64, 'base64')
- const dataBinary = Buffer.from(pgliteResources.dataBase64, 'base64')
- const vectorBinary = Buffer.from(pgliteResources.vectorBase64, 'base64')
+ // private async loadPGliteResources(): Promise<{
+ // fsBundle: Blob
+ // wasmModule: WebAssembly.Module
+ // vectorExtensionBundlePath: URL
+ // }> {
+ // try {
+ // // Convert base64 to binary data
+ // const wasmBinary = Buffer.from(pgliteResources.wasmBase64, 'base64')
+ // const dataBinary = Buffer.from(pgliteResources.dataBase64, 'base64')
+ // const vectorBinary = Buffer.from(pgliteResources.vectorBase64, 'base64')
- // Create blobs from binary data
- const fsBundle = new Blob([dataBinary], {
- type: 'application/octet-stream',
- })
- const wasmModule = await WebAssembly.compile(wasmBinary)
+ // // Create blobs from binary data
+ // const fsBundle = new Blob([dataBinary], {
+ // type: 'application/octet-stream',
+ // })
+ // const wasmModule = await WebAssembly.compile(wasmBinary)
- // Create a blob URL for the vector extension
- const vectorBlob = new Blob([vectorBinary], {
- type: 'application/gzip',
- })
- const vectorExtensionBundlePath = URL.createObjectURL(vectorBlob)
+ // // Create a blob URL for the vector extension
+ // const vectorBlob = new Blob([vectorBinary], {
+ // type: 'application/gzip',
+ // })
+ // const vectorExtensionBundlePath = URL.createObjectURL(vectorBlob)
- return {
- fsBundle,
- wasmModule,
- vectorExtensionBundlePath: new URL(vectorExtensionBundlePath),
- }
- } catch (error) {
- console.error('Error loading PGlite resources:', error)
- throw error
- }
- }
+ // return {
+ // fsBundle,
+ // wasmModule,
+ // vectorExtensionBundlePath: new URL(vectorExtensionBundlePath),
+ // }
+ // } catch (error) {
+ // console.error('Error loading PGlite resources:', error)
+ // throw error
+ // }
+ // }
}
diff --git a/src/database/modules/conversation/conversation-manager.ts b/src/database/modules/conversation/conversation-manager.ts
index cdb8930..2009086 100644
--- a/src/database/modules/conversation/conversation-manager.ts
+++ b/src/database/modules/conversation/conversation-manager.ts
@@ -30,7 +30,6 @@ export class ConversationManager {
updatedAt: new Date(),
}
await this.repository.create(conversation)
- await this.dbManager.save()
}
async saveConversation(id: string, messages: ChatMessage[]): Promise {
@@ -59,7 +58,6 @@ export class ConversationManager {
// Update conversation timestamp
await this.repository.update(id, { updatedAt: new Date() })
- await this.dbManager.save()
}
async findConversation(id: string): Promise {
@@ -74,7 +72,6 @@ export class ConversationManager {
async deleteConversation(id: string): Promise {
await this.repository.delete(id)
- await this.dbManager.save()
}
getAllConversations(callback: (conversations: ChatConversationMeta[]) => void): void {
@@ -92,7 +89,6 @@ export class ConversationManager {
async updateConversationTitle(id: string, title: string): Promise {
await this.repository.update(id, { title })
- await this.dbManager.save()
}
// convert ChatMessage to InsertMessage
diff --git a/src/database/modules/template/template-manager.ts b/src/database/modules/template/template-manager.ts
index 1d78ee5..a4be3b7 100644
--- a/src/database/modules/template/template-manager.ts
+++ b/src/database/modules/template/template-manager.ts
@@ -24,7 +24,6 @@ export class TemplateManager {
throw new DuplicateTemplateException(template.name)
}
const created = await this.repository.create(template)
- await this.dbManager.save()
return created
}
@@ -45,7 +44,6 @@ export class TemplateManager {
async deleteTemplate(id: string): Promise {
const deleted = await this.repository.delete(id)
- await this.dbManager.save()
return deleted
}
}
diff --git a/src/database/modules/vector/vector-manager.ts b/src/database/modules/vector/vector-manager.ts
index 17f12c6..61ab56e 100644
--- a/src/database/modules/vector/vector-manager.ts
+++ b/src/database/modules/vector/vector-manager.ts
@@ -6,10 +6,10 @@ import pLimit from 'p-limit'
import { IndexProgress } from '../../../components/chat-view/QueryProgress'
import {
- LLMAPIKeyInvalidException,
- LLMAPIKeyNotSetException,
- LLMBaseUrlNotSetException,
- LLMRateLimitExceededException,
+ LLMAPIKeyInvalidException,
+ LLMAPIKeyNotSetException,
+ LLMBaseUrlNotSetException,
+ LLMRateLimitExceededException,
} from '../../../core/llm/exception'
import { InsertVector, SelectVector } from '../../../database/schema'
import { EmbeddingModel } from '../../../types/embedding'
@@ -19,260 +19,353 @@ import { DBManager } from '../../database-manager'
import { VectorRepository } from './vector-repository'
export class VectorManager {
- private app: App
- private repository: VectorRepository
- private dbManager: DBManager
+ private app: App
+ private repository: VectorRepository
+ private dbManager: DBManager
- constructor(app: App, dbManager: DBManager) {
- this.app = app
- this.dbManager = dbManager
- this.repository = new VectorRepository(app, dbManager.getPgClient())
- }
+ constructor(app: App, dbManager: DBManager) {
+ this.app = app
+ this.dbManager = dbManager
+ this.repository = new VectorRepository(app, dbManager.getPgClient())
+ }
- async performSimilaritySearch(
- queryVector: number[],
- embeddingModel: EmbeddingModel,
- options: {
- minSimilarity: number
- limit: number
- scope?: {
- files: string[]
- folders: string[]
- }
- },
- ): Promise<
- (Omit & {
- similarity: number
- })[]
- > {
- return await this.repository.performSimilaritySearch(
- queryVector,
- embeddingModel,
- options,
- )
- }
+ async performSimilaritySearch(
+ queryVector: number[],
+ embeddingModel: EmbeddingModel,
+ options: {
+ minSimilarity: number
+ limit: number
+ scope?: {
+ files: string[]
+ folders: string[]
+ }
+ },
+ ): Promise<
+ (Omit & {
+ similarity: number
+ })[]
+ > {
+ return await this.repository.performSimilaritySearch(
+ queryVector,
+ embeddingModel,
+ options,
+ )
+ }
- async updateVaultIndex(
- embeddingModel: EmbeddingModel,
- options: {
- chunkSize: number
- excludePatterns: string[]
- includePatterns: string[]
- reindexAll?: boolean
- },
- updateProgress?: (indexProgress: IndexProgress) => void,
- ): Promise {
- let filesToIndex: TFile[]
- if (options.reindexAll) {
- filesToIndex = await this.getFilesToIndex({
- embeddingModel: embeddingModel,
- excludePatterns: options.excludePatterns,
- includePatterns: options.includePatterns,
- reindexAll: true,
- })
- await this.repository.clearAllVectors(embeddingModel)
- } else {
- await this.deleteVectorsForDeletedFiles(embeddingModel)
- filesToIndex = await this.getFilesToIndex({
- embeddingModel: embeddingModel,
- excludePatterns: options.excludePatterns,
- includePatterns: options.includePatterns,
- })
- await this.repository.deleteVectorsForMultipleFiles(
- filesToIndex.map((file) => file.path),
- embeddingModel,
- )
- }
+ async updateVaultIndex(
+ embeddingModel: EmbeddingModel,
+ options: {
+ chunkSize: number
+ excludePatterns: string[]
+ includePatterns: string[]
+ reindexAll?: boolean
+ },
+ updateProgress?: (indexProgress: IndexProgress) => void,
+ ): Promise {
+ let filesToIndex: TFile[]
+ if (options.reindexAll) {
+ filesToIndex = await this.getFilesToIndex({
+ embeddingModel: embeddingModel,
+ excludePatterns: options.excludePatterns,
+ includePatterns: options.includePatterns,
+ reindexAll: true,
+ })
+ await this.repository.clearAllVectors(embeddingModel)
+ } else {
+ await this.cleanVectorsForDeletedFiles(embeddingModel)
+ filesToIndex = await this.getFilesToIndex({
+ embeddingModel: embeddingModel,
+ excludePatterns: options.excludePatterns,
+ includePatterns: options.includePatterns,
+ })
+ await this.repository.deleteVectorsForMultipleFiles(
+ filesToIndex.map((file) => file.path),
+ embeddingModel,
+ )
+ }
- if (filesToIndex.length === 0) {
- return
- }
+ if (filesToIndex.length === 0) {
+ return
+ }
- const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
- 'markdown',
- {
- chunkSize: options.chunkSize,
- // TODO: Use token-based chunking after migrating to WebAssembly-based tiktoken
- // Current token counting method is too slow for practical use
- // lengthFunction: async (text) => {
- // return await tokenCount(text)
- // },
- },
- )
+ const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
+ 'markdown',
+ {
+ chunkSize: options.chunkSize,
+ // TODO: Use token-based chunking after migrating to WebAssembly-based tiktoken
+ // Current token counting method is too slow for practical use
+ // lengthFunction: async (text) => {
+ // return await tokenCount(text)
+ // },
+ },
+ )
- const contentChunks: InsertVector[] = (
- await Promise.all(
- filesToIndex.map(async (file) => {
- const fileContent = await this.app.vault.cachedRead(file)
- const fileDocuments = await textSplitter.createDocuments([
- fileContent,
- ])
- return fileDocuments.map((chunk): InsertVector => {
- return {
- path: file.path,
- mtime: file.stat.mtime,
+ const contentChunks: InsertVector[] = (
+ await Promise.all(
+ filesToIndex.map(async (file) => {
+ const fileContent = await this.app.vault.cachedRead(file)
+ const fileDocuments = await textSplitter.createDocuments([
+ fileContent,
+ ])
+ return fileDocuments.map((chunk): InsertVector => {
+ return {
+ path: file.path,
+ mtime: file.stat.mtime,
content: chunk.pageContent,
embedding: [],
- metadata: {
- startLine: chunk.metadata.loc.lines.from as number,
- endLine: chunk.metadata.loc.lines.to as number,
- },
- }
- })
- }),
- )
- ).flat()
+ metadata: {
+ startLine: Number(chunk.metadata.loc.lines.from),
+ endLine: Number(chunk.metadata.loc.lines.to),
+ },
+ }
+ })
+ }),
+ )
+ ).flat()
- updateProgress?.({
- completedChunks: 0,
- totalChunks: contentChunks.length,
- totalFiles: filesToIndex.length,
- })
+ updateProgress?.({
+ completedChunks: 0,
+ totalChunks: contentChunks.length,
+ totalFiles: filesToIndex.length,
+ })
- const embeddingProgress = { completed: 0, inserted: 0 }
- const embeddingChunks: InsertVector[] = []
- const batchSize = 100
- const limit = pLimit(50)
- const abortController = new AbortController()
- const tasks = contentChunks.map((chunk) =>
- limit(async () => {
- if (abortController.signal.aborted) {
- throw new Error('Operation was aborted')
- }
- try {
- await backOff(
- async () => {
- const embedding = await embeddingModel.getEmbedding(chunk.content)
- const embeddedChunk = {
- path: chunk.path,
- mtime: chunk.mtime,
- content: chunk.content,
- embedding,
- metadata: chunk.metadata,
- }
- embeddingChunks.push(embeddedChunk)
- embeddingProgress.completed++
- updateProgress?.({
- completedChunks: embeddingProgress.completed,
- totalChunks: contentChunks.length,
- totalFiles: filesToIndex.length,
- })
+ const embeddingProgress = { completed: 0 }
+ const embeddingChunks: InsertVector[] = []
+ const batchSize = 100
+ const limit = pLimit(50)
+ const abortController = new AbortController()
+ const tasks = contentChunks.map((chunk) =>
+ limit(async () => {
+ if (abortController.signal.aborted) {
+ throw new Error('Operation was aborted')
+ }
+ try {
+ await backOff(
+ async () => {
+ const embedding = await embeddingModel.getEmbedding(chunk.content)
+ const embeddedChunk = {
+ path: chunk.path,
+ mtime: chunk.mtime,
+ content: chunk.content,
+ embedding,
+ metadata: chunk.metadata,
+ }
+ embeddingChunks.push(embeddedChunk)
+ embeddingProgress.completed++
+ updateProgress?.({
+ completedChunks: embeddingProgress.completed,
+ totalChunks: contentChunks.length,
+ totalFiles: filesToIndex.length,
+ })
+ },
+ {
+ numOfAttempts: 5,
+ startingDelay: 1000,
+ timeMultiple: 1.5,
+ jitter: 'full',
+ },
+ )
+ } catch (error) {
+ abortController.abort()
+ throw error
+ }
+ }),
+ )
- // Insert vectors in batches
- if (
- embeddingChunks.length >=
- embeddingProgress.inserted + batchSize ||
- embeddingChunks.length === contentChunks.length
- ) {
- await this.repository.insertVectors(
- embeddingChunks.slice(
- embeddingProgress.inserted,
- embeddingProgress.inserted + batchSize,
- ),
- embeddingModel,
- )
- embeddingProgress.inserted += batchSize
- }
- },
- {
- numOfAttempts: 5,
- startingDelay: 1000,
- timeMultiple: 1.5,
- jitter: 'full',
- },
- )
- } catch (error) {
- abortController.abort()
- throw error
- }
- }),
- )
+ try {
+ await Promise.all(tasks)
- try {
- await Promise.all(tasks)
- } catch (error) {
- if (
- error instanceof LLMAPIKeyNotSetException ||
- error instanceof LLMAPIKeyInvalidException ||
- error instanceof LLMBaseUrlNotSetException
- ) {
- openSettingsModalWithError(this.app, (error as Error).message)
- } else if (error instanceof LLMRateLimitExceededException) {
- new Notice(error.message)
- } else {
- console.error('Error embedding chunks:', error)
- throw error
- }
- } finally {
- await this.dbManager.save()
- }
- }
+ // all embedding generated, batch insert
+ if (embeddingChunks.length > 0) {
+ // batch insert all vectors
+ let inserted = 0
+ while (inserted < embeddingChunks.length) {
+ const chunksToInsert = embeddingChunks.slice(
+ inserted,
+ Math.min(inserted + batchSize, embeddingChunks.length)
+ )
+ await this.repository.insertVectors(chunksToInsert, embeddingModel)
+ inserted += chunksToInsert.length
+ }
+ }
+ } catch (error) {
+ if (
+ error instanceof LLMAPIKeyNotSetException ||
+ error instanceof LLMAPIKeyInvalidException ||
+ error instanceof LLMBaseUrlNotSetException
+ ) {
+ openSettingsModalWithError(this.app, error.message)
+ } else if (error instanceof LLMRateLimitExceededException) {
+ new Notice(error.message)
+ } else {
+ console.error('Error embedding chunks:', error)
+ throw error
+ }
+ }
+ }
- private async deleteVectorsForDeletedFiles(embeddingModel: EmbeddingModel) {
- const indexedFilePaths =
- await this.repository.getIndexedFilePaths(embeddingModel)
- for (const filePath of indexedFilePaths) {
- if (!this.app.vault.getAbstractFileByPath(filePath)) {
- await this.repository.deleteVectorsForMultipleFiles(
- [filePath],
- embeddingModel,
- )
- }
- }
- }
+ async UpdateFileVectorIndex(
+ embeddingModel: EmbeddingModel,
+ chunkSize: number,
+ file: TFile
+ ) {
- private async getFilesToIndex({
- embeddingModel,
- excludePatterns,
- includePatterns,
- reindexAll,
- }: {
- embeddingModel: EmbeddingModel
- excludePatterns: string[]
- includePatterns: string[]
- reindexAll?: boolean
- }): Promise {
- let filesToIndex = this.app.vault.getMarkdownFiles()
+ // Delete existing vectors for the files
+ await this.repository.deleteVectorsForSingleFile(
+ file.path,
+ embeddingModel,
+ )
- filesToIndex = filesToIndex.filter((file) => {
- return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
- })
+ // Embed the files
+ const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
+ 'markdown',
+ {
+ chunkSize,
+ },
+ )
+ const fileContent = await this.app.vault.cachedRead(file)
+ const fileDocuments = await textSplitter.createDocuments([
+ fileContent,
+ ])
- if (includePatterns.length > 0) {
- filesToIndex = filesToIndex.filter((file) => {
- return includePatterns.some((pattern) => minimatch(file.path, pattern))
- })
- }
+ const contentChunks: InsertVector[] = fileDocuments.map((chunk): InsertVector => {
+ return {
+ path: file.path,
+ mtime: file.stat.mtime,
+ content: chunk.pageContent,
+ embedding: [],
+ metadata: {
+ startLine: Number(chunk.metadata.loc.lines.from),
+ endLine: Number(chunk.metadata.loc.lines.to),
+ },
+ }
+ })
- if (reindexAll) {
- return filesToIndex
- }
+ const embeddingChunks: InsertVector[] = []
+ const limit = pLimit(50)
+ const abortController = new AbortController()
+ const tasks = contentChunks.map((chunk) =>
+ limit(async () => {
+ if (abortController.signal.aborted) {
+ throw new Error('Operation was aborted')
+ }
+ try {
+ await backOff(
+ async () => {
+ const embedding = await embeddingModel.getEmbedding(chunk.content)
+ const embeddedChunk = {
+ path: chunk.path,
+ mtime: chunk.mtime,
+ content: chunk.content,
+ embedding,
+ metadata: chunk.metadata,
+ }
+ embeddingChunks.push(embeddedChunk)
+ },
+ {
+ numOfAttempts: 5,
+ startingDelay: 1000,
+ timeMultiple: 1.5,
+ jitter: 'full',
+ },
+ )
+ } catch (error) {
+ abortController.abort()
+ throw error
+ }
+ }),
+ )
- // Check for updated or new files
- filesToIndex = await Promise.all(
- filesToIndex.map(async (file) => {
- const fileChunks = await this.repository.getVectorsByFilePath(
- file.path,
- embeddingModel,
- )
- if (fileChunks.length === 0) {
- // File is not indexed, so we need to index it
- const fileContent = await this.app.vault.cachedRead(file)
- if (fileContent.length === 0) {
- // Ignore empty files
- return null
- }
- return file
- }
- const outOfDate = file.stat.mtime > fileChunks[0].mtime
- if (outOfDate) {
- // File has changed, so we need to re-index it
- return file
- }
- return null
- }),
- ).then((files) => files.filter(Boolean))
+ try {
+ await Promise.all(tasks)
- return filesToIndex
- }
+ // all embedding generated, batch insert
+ if (embeddingChunks.length > 0) {
+ const batchSize = 100
+ let inserted = 0
+ while (inserted < embeddingChunks.length) {
+ const chunksToInsert = embeddingChunks.slice(inserted, Math.min(inserted + batchSize, embeddingChunks.length))
+ await this.repository.insertVectors(chunksToInsert, embeddingModel)
+ inserted += chunksToInsert.length
+ }
+ }
+ } catch (error) {
+ console.error('Error embedding chunks:', error)
+ }
+ }
+
+ async DeleteFileVectorIndex(
+ embeddingModel: EmbeddingModel,
+ file: TFile
+ ) {
+ await this.repository.deleteVectorsForSingleFile(file.path, embeddingModel)
+ }
+
+ private async cleanVectorsForDeletedFiles(
+ embeddingModel: EmbeddingModel,
+ ) {
+ const indexedFilePaths = await this.repository.getAllIndexedFilePaths(embeddingModel)
+ const needToDelete = indexedFilePaths.filter(filePath => !this.app.vault.getAbstractFileByPath(filePath))
+ if (needToDelete.length > 0) {
+ await this.repository.deleteVectorsForMultipleFiles(
+ needToDelete,
+ embeddingModel,
+ )
+ }
+ }
+
+ private async getFilesToIndex({
+ embeddingModel,
+ excludePatterns,
+ includePatterns,
+ reindexAll,
+ }: {
+ embeddingModel: EmbeddingModel
+ excludePatterns: string[]
+ includePatterns: string[]
+ reindexAll?: boolean
+ }): Promise {
+ let filesToIndex = this.app.vault.getMarkdownFiles()
+
+ filesToIndex = filesToIndex.filter((file) => {
+ return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
+ })
+
+ if (includePatterns.length > 0) {
+ filesToIndex = filesToIndex.filter((file) => {
+ return includePatterns.some((pattern) => minimatch(file.path, pattern))
+ })
+ }
+
+ if (reindexAll) {
+ return filesToIndex
+ }
+
+ // Check for updated or new files
+ filesToIndex = await Promise.all(
+ filesToIndex.map(async (file) => {
+ const fileChunks = await this.repository.getVectorsByFilePath(
+ file.path,
+ embeddingModel,
+ )
+ if (fileChunks.length === 0) {
+ // File is not indexed, so we need to index it
+ const fileContent = await this.app.vault.cachedRead(file)
+ if (fileContent.length === 0) {
+ // Ignore empty files
+ return null
+ }
+ return file
+ }
+ const outOfDate = file.stat.mtime > fileChunks[0].mtime
+ if (outOfDate) {
+ // File has changed, so we need to re-index it
+ return file
+ }
+ return null
+ }),
+ ).then((files) => files.filter(Boolean))
+
+ return filesToIndex
+ }
}
diff --git a/src/database/modules/vector/vector-repository.ts b/src/database/modules/vector/vector-repository.ts
index 6a93679..301562f 100644
--- a/src/database/modules/vector/vector-repository.ts
+++ b/src/database/modules/vector/vector-repository.ts
@@ -22,7 +22,7 @@ export class VectorRepository {
return tableDefinition.name
}
- async getIndexedFilePaths(embeddingModel: EmbeddingModel): Promise {
+ async getAllIndexedFilePaths(embeddingModel: EmbeddingModel): Promise {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
@@ -80,7 +80,7 @@ export class VectorRepository {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
- const tableName = this.getTableName(embeddingModel)
+ const tableName = this.getTableName(embeddingModel)
await this.db.query(`DELETE FROM "${tableName}"`)
}
@@ -160,7 +160,11 @@ export class VectorRepository {
if (conditions.length > 0) {
scopeCondition = `AND (${conditions.join(' OR ')})`
}
- }
+ }
+
+ const queryVectorLength = `SELECT count(1) FROM "${tableName}"`;
+ const queryVectorLengthResult = await this.db.query(queryVectorLength)
+ console.log('queryVectorLengthResult, ', queryVectorLengthResult)
const query = `
SELECT
diff --git a/src/main.ts b/src/main.ts
index 7512209..d9abcbe 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -1,5 +1,6 @@
// @ts-nocheck
import { EditorView } from '@codemirror/view'
+// import { PGlite } from '@electric-sql/pglite'
import { Editor, MarkdownView, Notice, Plugin, TFile } from 'obsidian'
import { ApplyView } from './ApplyView'
@@ -25,8 +26,8 @@ import {
InfioSettings,
parseInfioSettings,
} from './types/settings'
-import './utils/path'
import { getMentionableBlockData } from './utils/obsidian'
+import './utils/path'
// Remember to rename these classes and interfaces!
export default class InfioPlugin extends Plugin {
@@ -41,7 +42,7 @@ export default class InfioPlugin extends Plugin {
inlineEdit: InlineEdit | null = null
private dbManagerInitPromise: Promise | null = null
private ragEngineInitPromise: Promise | null = null
-
+ // private pg: PGlite | null = null
async onload() {
await this.loadSettings()
@@ -49,6 +50,9 @@ export default class InfioPlugin extends Plugin {
this.settingTab = new InfioSettingTab(this.app, this)
this.addSettingTab(this.settingTab)
+ // create and init pglite db
+ // this.pg = await createAndInitDb()
+
// This creates an icon in the left ribbon.
this.addRibbonIcon('wand-sparkles', 'Open infio copilot', () =>
this.openChatView(),
@@ -120,6 +124,17 @@ export default class InfioPlugin extends Plugin {
this.app.metadataCache.on("changed", (file: TFile) => {
if (file) {
eventListener.handleFileChange(file);
+ console.log("file changed: filename: ", file.name);
+ this.ragEngine?.updateFileIndex(file);
+ }
+ })
+ );
+
+ this.registerEvent(
+ this.app.metadataCache.on("deleted", (file: TFile) => {
+ if (file) {
+ console.log("file deleted: filename: ", file.name)
+ this.ragEngine?.deleteFileIndex(file);
}
})
);
@@ -322,7 +337,7 @@ export default class InfioPlugin extends Plugin {
}
onunload() {
- this.dbManager?.cleanup()
+ // this.dbManager?.cleanup()
this.dbManager = null
}
diff --git a/src/pgworker/index.ts b/src/pgworker/index.ts
new file mode 100644
index 0000000..901bb28
--- /dev/null
+++ b/src/pgworker/index.ts
@@ -0,0 +1,19 @@
+import { live } from '@electric-sql/pglite/live';
+import { PGliteWorker } from '@electric-sql/pglite/worker';
+
+import PGWorker from './pglite.worker';
+
+export const createAndInitDb = async () => {
+ const worker = new PGWorker();
+
+ const pg = await PGliteWorker.create(
+ worker,
+ {
+ extensions: {
+ live,
+ },
+ },
+ )
+ console.log('PGlite DB created')
+ return pg
+}
diff --git a/src/pgworker/pglite.worker.ts b/src/pgworker/pglite.worker.ts
new file mode 100644
index 0000000..0c8c0fe
--- /dev/null
+++ b/src/pgworker/pglite.worker.ts
@@ -0,0 +1,78 @@
+// @ts-nocheck
+import { PGlite } from '@electric-sql/pglite'
+
+import { PGliteWorkerOptions, worker } from '@electric-sql/pglite/worker'
+
+import { pgliteResources } from '../database/pglite-resources'
+import { migrations } from '../database/sql'
+
+export { }
+
+const loadPGliteResources = async (): Promise<{
+ fsBundle: Blob
+ wasmModule: WebAssembly.Module
+ vectorExtensionBundlePath: URL
+}> => {
+ try {
+ // Convert base64 to binary data
+ const wasmBinary = Buffer.from(pgliteResources.wasmBase64, 'base64')
+ const dataBinary = Buffer.from(pgliteResources.dataBase64, 'base64')
+ const vectorBinary = Buffer.from(pgliteResources.vectorBase64, 'base64')
+
+ // Create blobs from binary data
+ const fsBundle = new Blob([dataBinary], {
+ type: 'application/octet-stream',
+ })
+ const wasmModule = await WebAssembly.compile(wasmBinary)
+
+ // Create a blob URL for the vector extension
+ const vectorBlob = new Blob([vectorBinary], {
+ type: 'application/gzip',
+ })
+ const vectorExtensionBundlePath = URL.createObjectURL(vectorBlob)
+
+ return {
+ fsBundle,
+ wasmModule,
+ vectorExtensionBundlePath: new URL(vectorExtensionBundlePath),
+ }
+ } catch (error) {
+ console.error('Error loading PGlite resources:', error)
+ throw error
+ }
+}
+
+worker({
+ async init(options: PGliteWorkerOptions) {
+ let db: PGlite;
+ try {
+ const { fsBundle, wasmModule, vectorExtensionBundlePath } =
+ await loadPGliteResources()
+
+ db = await PGlite.create('idb://infio-db', {
+ relaxedDurability: true,
+ fsBundle: fsBundle,
+ wasmModule: wasmModule,
+ ...options,
+ extensions: {
+ ...options.extensions,
+ vector: vectorExtensionBundlePath,
+ },
+ })
+ } catch (error) {
+ console.error('Error creating PGlite instance:', error)
+ throw error
+ }
+
+ // Execute SQL migrations
+ for (const [_key, migration] of Object.entries(migrations)) {
+ // Split SQL into individual commands and execute them one by one
+ const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
+ for (const command of commands) {
+ await db.exec(command);
+ }
+ }
+
+ return db
+ },
+})
diff --git a/src/pgworker/worker.d.ts b/src/pgworker/worker.d.ts
new file mode 100644
index 0000000..42d7eb9
--- /dev/null
+++ b/src/pgworker/worker.d.ts
@@ -0,0 +1,4 @@
+declare module 'pglite.worker' {
+ const WorkerFactory: new () => Worker;
+ export default WorkerFactory;
+}
\ No newline at end of file
diff --git a/src/settings/components/ProviderModelsPicker.tsx b/src/settings/components/ProviderModelsPicker.tsx
index 9f394ed..3303520 100644
--- a/src/settings/components/ProviderModelsPicker.tsx
+++ b/src/settings/components/ProviderModelsPicker.tsx
@@ -234,7 +234,7 @@ export const ComboBoxComponent: React.FC = ({
- [{modelProvider}]{modelId}
+ [{modelProvider}] {modelId}
\n${currentFileContent}\n`
: undefined
diff --git a/src/utils/web-search.ts b/src/utils/web-search.ts
index 0e3a9fa..6928831 100644
--- a/src/utils/web-search.ts
+++ b/src/utils/web-search.ts
@@ -20,8 +20,6 @@ export async function webSearch(query: string, serperApiKey: string): Promise {
const url = `${SERPER_BASE_URL}?q=${encodeURIComponent(query)}&engine=google&api_key=${serperApiKey}&num=20`;
- console.log(url)
-
https.get(url, (res: any) => {
let data = '';
@@ -31,7 +29,6 @@ export async function webSearch(query: string, serperApiKey: string): Promise {
try {
- console.log(data)
let parsedData: SearchResponse;
try {
parsedData = JSON.parse(data);
@@ -90,8 +87,6 @@ export async function fetchUrlsContent(urls: string[], apiKey: string): Promise<
}
});
- console.log('fetchUrlsContent', results);
-
Promise.all(results).then((texts) => {
resolve(texts.join('\n\n'));
}).catch((error) => {
@@ -123,8 +118,6 @@ function fetchJina(url: string, apiKey: string): Promise {
});
res.on('end', () => {
- console.log(data);
-
try {
// check if there is an error response
const response = JSON.parse(data);