fix: pdf null byte error

This commit is contained in:
duanfuxiang 2025-06-13 11:11:04 +08:00
parent 540226a792
commit f6728f1b82
4 changed files with 39 additions and 22 deletions

View File

@ -23,16 +23,16 @@ import { useLLM } from '../../contexts/LLMContext'
import { useMcpHub } from '../../contexts/McpHubContext' import { useMcpHub } from '../../contexts/McpHubContext'
import { useRAG } from '../../contexts/RAGContext' import { useRAG } from '../../contexts/RAGContext'
import { useSettings } from '../../contexts/SettingsContext' import { useSettings } from '../../contexts/SettingsContext'
import { matchSearchUsingCorePlugin } from '../../core/file-search/match/coreplugin-match'
import { matchSearchUsingOmnisearch } from '../../core/file-search/match/omnisearch-match'
import { regexSearchUsingCorePlugin } from '../../core/file-search/regex/coreplugin-regex'
import { regexSearchUsingRipgrep } from '../../core/file-search/regex/ripgrep-regex'
import { import {
LLMAPIKeyInvalidException, LLMAPIKeyInvalidException,
LLMAPIKeyNotSetException, LLMAPIKeyNotSetException,
LLMBaseUrlNotSetException, LLMBaseUrlNotSetException,
LLMModelNotSetException, LLMModelNotSetException,
} from '../../core/llm/exception' } from '../../core/llm/exception'
import { matchSearchUsingCorePlugin } from '../../core/file-search/match/coreplugin-match'
import { matchSearchUsingOmnisearch } from '../../core/file-search/match/omnisearch-match'
import { regexSearchUsingRipgrep } from '../../core/file-search/regex/ripgrep-regex'
import { regexSearchUsingCorePlugin } from '../../core/file-search/regex/coreplugin-regex'
import { useChatHistory } from '../../hooks/use-chat-history' import { useChatHistory } from '../../hooks/use-chat-history'
import { useCustomModes } from '../../hooks/use-custom-mode' import { useCustomModes } from '../../hooks/use-custom-mode'
import { t } from '../../lang/helpers' import { t } from '../../lang/helpers'
@ -50,26 +50,26 @@ import {
getMentionableKey, getMentionableKey,
serializeMentionable, serializeMentionable,
} from '../../utils/mentionable' } from '../../utils/mentionable'
import { readTFileContent } from '../../utils/obsidian' import { readTFileContent, readTFileContentPdf } from '../../utils/obsidian'
import { openSettingsModalWithError } from '../../utils/open-settings-modal' import { openSettingsModalWithError } from '../../utils/open-settings-modal'
import { PromptGenerator, addLineNumbers } from '../../utils/prompt-generator' import { PromptGenerator, addLineNumbers } from '../../utils/prompt-generator'
// Removed empty line above, added one below for group separation // Removed empty line above, added one below for group separation
import { fetchUrlsContent, onEnt, webSearch } from '../../utils/web-search' import { fetchUrlsContent, onEnt, webSearch } from '../../utils/web-search'
import { ModeSelect } from './chat-input/ModeSelect' // Start of new group import { ModeSelect } from './chat-input/ModeSelect'; // Start of new group
import PromptInputWithActions, { ChatUserInputRef } from './chat-input/PromptInputWithActions' import PromptInputWithActions, { ChatUserInputRef } from './chat-input/PromptInputWithActions'
import { editorStateToPlainText } from './chat-input/utils/editor-state-to-plain-text' import { editorStateToPlainText } from './chat-input/utils/editor-state-to-plain-text'
import { ChatHistory } from './ChatHistoryView' import { ChatHistory } from './ChatHistoryView'
import CommandsView from './CommandsView' import CommandsView from './CommandsView'
import CustomModeView from './CustomModeView' import CustomModeView from './CustomModeView'
import FileReadResults from './FileReadResults'
import HelloInfo from './HelloInfo' import HelloInfo from './HelloInfo'
import McpHubView from './McpHubView' // Moved after MarkdownReasoningBlock import MarkdownReasoningBlock from './Markdown/MarkdownReasoningBlock'
import McpHubView from './McpHubView'; // Moved after MarkdownReasoningBlock
import QueryProgress, { QueryProgressState } from './QueryProgress' import QueryProgress, { QueryProgressState } from './QueryProgress'
import ReactMarkdown from './ReactMarkdown' import ReactMarkdown from './ReactMarkdown'
import SimilaritySearchResults from './SimilaritySearchResults' import SimilaritySearchResults from './SimilaritySearchResults'
import FileReadResults from './FileReadResults'
import WebsiteReadResults from './WebsiteReadResults' import WebsiteReadResults from './WebsiteReadResults'
import MarkdownReasoningBlock from './Markdown/MarkdownReasoningBlock'
// Add an empty line here // Add an empty line here
const getNewInputMessage = (app: App, defaultMention: string): ChatUserMessage => { const getNewInputMessage = (app: App, defaultMention: string): ChatUserMessage => {
@ -581,7 +581,7 @@ const Chat = forwardRef<ChatRef, ChatProps>((props, ref) => {
if (!opFile) { if (!opFile) {
throw new Error(`File not found: ${toolArgs.filepath}`) throw new Error(`File not found: ${toolArgs.filepath}`)
} }
const fileContent = await readTFileContent(opFile, app.vault, app) const fileContent = await readTFileContentPdf(opFile, app.vault, app)
const formattedContent = `[read_file for '${toolArgs.filepath}'] Result:\n${addLineNumbers(fileContent)}\n`; const formattedContent = `[read_file for '${toolArgs.filepath}'] Result:\n${addLineNumbers(fileContent)}\n`;
return { return {
type: 'read_file', type: 'read_file',

View File

@ -88,4 +88,4 @@ export async function matchSearchUsingCorePlugin(
console.error("Error during core plugin processing:", error); console.error("Error during core plugin processing:", error);
return "An error occurred during the search."; return "An error occurred during the search.";
} }
} }

View File

@ -8,7 +8,7 @@ export async function parsePdfContent(file: TFile, app: App): Promise<string> {
try { try {
// 使用 Obsidian 内置的 PDF.js // 使用 Obsidian 内置的 PDF.js
const pdfjsLib = await loadPdfJs() const pdfjsLib = await loadPdfJs()
// Read PDF file as binary buffer // Read PDF file as binary buffer
const pdfBuffer = await app.vault.readBinary(file) const pdfBuffer = await app.vault.readBinary(file)
@ -26,7 +26,9 @@ export async function parsePdfContent(file: TFile, app: App): Promise<string> {
fullText += pageText + '\n\n' fullText += pageText + '\n\n'
} }
return fullText || '(Empty PDF content)' // 清理null字节防止PostgreSQL UTF8编码错误
const cleanText = (fullText || '(Empty PDF content)').replace(/\0/g, '')
return cleanText
} catch (error: any) { } catch (error: any) {
console.error('Error parsing PDF:', error) console.error('Error parsing PDF:', error)
return `(Error reading PDF file: ${error?.message || 'Unknown error'})` return `(Error reading PDF file: ${error?.message || 'Unknown error'})`
@ -36,27 +38,42 @@ export async function parsePdfContent(file: TFile, app: App): Promise<string> {
export async function readTFileContent( export async function readTFileContent(
file: TFile, file: TFile,
vault: Vault, vault: Vault,
): Promise<string> {
if (file.extension != 'md') {
return "(Binary file, unable to display content)"
}
const content = await vault.cachedRead(file)
// 清理null字节防止PostgreSQL UTF8编码错误
return content.replace(/\0/g, '')
}
export async function readTFileContentPdf(
file: TFile,
vault: Vault,
app?: App, app?: App,
): Promise<string> { ): Promise<string> {
if (file.extension === 'pdf') { if (file.extension === 'pdf') {
if (app) { if (app) {
return await parsePdfContent(file, app) const content = await parsePdfContent(file, app)
// 清理null字节防止PostgreSQL UTF8编码错误
return content.replace(/\0/g, '')
} }
return "(PDF file, app context required for processing)" return "(PDF file, app context required for processing)"
} }
if (file.extension != 'md') { if (file.extension != 'md') {
return "(Binary file, unable to display content)" return "(Binary file, unable to display content)"
} }
return await vault.cachedRead(file) const content = await vault.cachedRead(file)
// 清理null字节防止PostgreSQL UTF8编码错误
return content.replace(/\0/g, '')
} }
export async function readMultipleTFiles( export async function readMultipleTFiles(
files: TFile[], files: TFile[],
vault: Vault, vault: Vault
app?: App,
): Promise<string[]> { ): Promise<string[]> {
// Read files in parallel // Read files in parallel
const readPromises = files.map((file) => readTFileContent(file, vault, app)) const readPromises = files.map((file) => readTFileContent(file, vault))
return await Promise.all(readPromises) return await Promise.all(readPromises)
} }

View File

@ -87,7 +87,7 @@ async function getFileOrFolderContent(
if (path.extension != 'md') { if (path.extension != 'md') {
return "(Binary file, unable to display content)" return "(Binary file, unable to display content)"
} }
return addLineNumbers(await readTFileContent(path, vault, app)) return addLineNumbers(await readTFileContent(path, vault))
} else if (path instanceof TFolder) { } else if (path instanceof TFolder) {
const entries = path.children const entries = path.children
let folderContent = "" let folderContent = ""
@ -111,7 +111,7 @@ async function getFileOrFolderContent(
if (entry.extension != 'md') { if (entry.extension != 'md') {
return undefined return undefined
} }
const content = addLineNumbers(await readTFileContent(entry, vault, app)) const content = addLineNumbers(await readTFileContent(entry, vault))
return `<file_content path="${entry.path}">\n${content}\n</file_content>` return `<file_content path="${entry.path}">\n${content}\n</file_content>`
} catch (error) { } catch (error) {
return undefined return undefined
@ -883,7 +883,7 @@ ${customInstruction}
private async getCurrentFileMessage( private async getCurrentFileMessage(
currentFile: TFile, currentFile: TFile,
): Promise<RequestMessage> { ): Promise<RequestMessage> {
const fileContent = await readTFileContent(currentFile, this.app.vault, this.app) const fileContent = await readTFileContent(currentFile, this.app.vault)
return { return {
role: 'user', role: 'user',
content: `# Inputs content: `# Inputs
@ -905,7 +905,7 @@ ${fileContent}
return null; return null;
} }
const fileContent = await readTFileContent(currentFile, this.app.vault, this.app); const fileContent = await readTFileContent(currentFile, this.app.vault);
const lines = fileContent.split('\n'); const lines = fileContent.split('\n');
// 计算上下文范围,并处理边界情况 // 计算上下文范围,并处理边界情况