From f6728f1b82c48675a49bb57ac81d39eb69539f91 Mon Sep 17 00:00:00 2001 From: duanfuxiang Date: Fri, 13 Jun 2025 11:11:04 +0800 Subject: [PATCH] fix: pdf null byte error --- src/components/chat-view/ChatView.tsx | 20 ++++++------ .../file-search/match/coreplugin-match.ts | 2 +- src/utils/obsidian.ts | 31 ++++++++++++++----- src/utils/prompt-generator.ts | 8 ++--- 4 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/components/chat-view/ChatView.tsx b/src/components/chat-view/ChatView.tsx index 52900d0..63829ed 100644 --- a/src/components/chat-view/ChatView.tsx +++ b/src/components/chat-view/ChatView.tsx @@ -23,16 +23,16 @@ import { useLLM } from '../../contexts/LLMContext' import { useMcpHub } from '../../contexts/McpHubContext' import { useRAG } from '../../contexts/RAGContext' import { useSettings } from '../../contexts/SettingsContext' +import { matchSearchUsingCorePlugin } from '../../core/file-search/match/coreplugin-match' +import { matchSearchUsingOmnisearch } from '../../core/file-search/match/omnisearch-match' +import { regexSearchUsingCorePlugin } from '../../core/file-search/regex/coreplugin-regex' +import { regexSearchUsingRipgrep } from '../../core/file-search/regex/ripgrep-regex' import { LLMAPIKeyInvalidException, LLMAPIKeyNotSetException, LLMBaseUrlNotSetException, LLMModelNotSetException, } from '../../core/llm/exception' -import { matchSearchUsingCorePlugin } from '../../core/file-search/match/coreplugin-match' -import { matchSearchUsingOmnisearch } from '../../core/file-search/match/omnisearch-match' -import { regexSearchUsingRipgrep } from '../../core/file-search/regex/ripgrep-regex' -import { regexSearchUsingCorePlugin } from '../../core/file-search/regex/coreplugin-regex' import { useChatHistory } from '../../hooks/use-chat-history' import { useCustomModes } from '../../hooks/use-custom-mode' import { t } from '../../lang/helpers' @@ -50,26 +50,26 @@ import { getMentionableKey, serializeMentionable, } from '../../utils/mentionable' -import { readTFileContent } from '../../utils/obsidian' +import { readTFileContent, readTFileContentPdf } from '../../utils/obsidian' import { openSettingsModalWithError } from '../../utils/open-settings-modal' import { PromptGenerator, addLineNumbers } from '../../utils/prompt-generator' // Removed empty line above, added one below for group separation import { fetchUrlsContent, onEnt, webSearch } from '../../utils/web-search' -import { ModeSelect } from './chat-input/ModeSelect' // Start of new group +import { ModeSelect } from './chat-input/ModeSelect'; // Start of new group import PromptInputWithActions, { ChatUserInputRef } from './chat-input/PromptInputWithActions' import { editorStateToPlainText } from './chat-input/utils/editor-state-to-plain-text' import { ChatHistory } from './ChatHistoryView' import CommandsView from './CommandsView' import CustomModeView from './CustomModeView' +import FileReadResults from './FileReadResults' import HelloInfo from './HelloInfo' -import McpHubView from './McpHubView' // Moved after MarkdownReasoningBlock +import MarkdownReasoningBlock from './Markdown/MarkdownReasoningBlock' +import McpHubView from './McpHubView'; // Moved after MarkdownReasoningBlock import QueryProgress, { QueryProgressState } from './QueryProgress' import ReactMarkdown from './ReactMarkdown' import SimilaritySearchResults from './SimilaritySearchResults' -import FileReadResults from './FileReadResults' import WebsiteReadResults from './WebsiteReadResults' -import MarkdownReasoningBlock from './Markdown/MarkdownReasoningBlock' // Add an empty line here const getNewInputMessage = (app: App, defaultMention: string): ChatUserMessage => { @@ -581,7 +581,7 @@ const Chat = forwardRef((props, ref) => { if (!opFile) { throw new Error(`File not found: ${toolArgs.filepath}`) } - const fileContent = await readTFileContent(opFile, app.vault, app) + const fileContent = await readTFileContentPdf(opFile, app.vault, app) const formattedContent = `[read_file for '${toolArgs.filepath}'] Result:\n${addLineNumbers(fileContent)}\n`; return { type: 'read_file', diff --git a/src/core/file-search/match/coreplugin-match.ts b/src/core/file-search/match/coreplugin-match.ts index 809ecde..e54172d 100644 --- a/src/core/file-search/match/coreplugin-match.ts +++ b/src/core/file-search/match/coreplugin-match.ts @@ -88,4 +88,4 @@ export async function matchSearchUsingCorePlugin( console.error("Error during core plugin processing:", error); return "An error occurred during the search."; } -} \ No newline at end of file +} diff --git a/src/utils/obsidian.ts b/src/utils/obsidian.ts index b4d09aa..ac4251f 100644 --- a/src/utils/obsidian.ts +++ b/src/utils/obsidian.ts @@ -8,7 +8,7 @@ export async function parsePdfContent(file: TFile, app: App): Promise { try { // 使用 Obsidian 内置的 PDF.js const pdfjsLib = await loadPdfJs() - + // Read PDF file as binary buffer const pdfBuffer = await app.vault.readBinary(file) @@ -26,7 +26,9 @@ export async function parsePdfContent(file: TFile, app: App): Promise { fullText += pageText + '\n\n' } - return fullText || '(Empty PDF content)' + // 清理null字节,防止PostgreSQL UTF8编码错误 + const cleanText = (fullText || '(Empty PDF content)').replace(/\0/g, '') + return cleanText } catch (error: any) { console.error('Error parsing PDF:', error) return `(Error reading PDF file: ${error?.message || 'Unknown error'})` @@ -36,27 +38,42 @@ export async function parsePdfContent(file: TFile, app: App): Promise { export async function readTFileContent( file: TFile, vault: Vault, +): Promise { + if (file.extension != 'md') { + return "(Binary file, unable to display content)" + } + const content = await vault.cachedRead(file) + // 清理null字节,防止PostgreSQL UTF8编码错误 + return content.replace(/\0/g, '') +} + +export async function readTFileContentPdf( + file: TFile, + vault: Vault, app?: App, ): Promise { if (file.extension === 'pdf') { if (app) { - return await parsePdfContent(file, app) + const content = await parsePdfContent(file, app) + // 清理null字节,防止PostgreSQL UTF8编码错误 + return content.replace(/\0/g, '') } return "(PDF file, app context required for processing)" } if (file.extension != 'md') { return "(Binary file, unable to display content)" } - return await vault.cachedRead(file) + const content = await vault.cachedRead(file) + // 清理null字节,防止PostgreSQL UTF8编码错误 + return content.replace(/\0/g, '') } export async function readMultipleTFiles( files: TFile[], - vault: Vault, - app?: App, + vault: Vault ): Promise { // Read files in parallel - const readPromises = files.map((file) => readTFileContent(file, vault, app)) + const readPromises = files.map((file) => readTFileContent(file, vault)) return await Promise.all(readPromises) } diff --git a/src/utils/prompt-generator.ts b/src/utils/prompt-generator.ts index ca3d9bb..fa9aab6 100644 --- a/src/utils/prompt-generator.ts +++ b/src/utils/prompt-generator.ts @@ -87,7 +87,7 @@ async function getFileOrFolderContent( if (path.extension != 'md') { return "(Binary file, unable to display content)" } - return addLineNumbers(await readTFileContent(path, vault, app)) + return addLineNumbers(await readTFileContent(path, vault)) } else if (path instanceof TFolder) { const entries = path.children let folderContent = "" @@ -111,7 +111,7 @@ async function getFileOrFolderContent( if (entry.extension != 'md') { return undefined } - const content = addLineNumbers(await readTFileContent(entry, vault, app)) + const content = addLineNumbers(await readTFileContent(entry, vault)) return `\n${content}\n` } catch (error) { return undefined @@ -883,7 +883,7 @@ ${customInstruction} private async getCurrentFileMessage( currentFile: TFile, ): Promise { - const fileContent = await readTFileContent(currentFile, this.app.vault, this.app) + const fileContent = await readTFileContent(currentFile, this.app.vault) return { role: 'user', content: `# Inputs @@ -905,7 +905,7 @@ ${fileContent} return null; } - const fileContent = await readTFileContent(currentFile, this.app.vault, this.app); + const fileContent = await readTFileContent(currentFile, this.app.vault); const lines = fileContent.split('\n'); // 计算上下文范围,并处理边界情况