infio-copilot-dev/src/utils/parse-infio-block.ts
2025-04-01 18:56:11 +08:00

580 lines
17 KiB
TypeScript

import JSON5 from 'json5'
import { parseFragment } from 'parse5'
export type ParsedMsgBlock =
| {
type: 'string'
content: string
}
| {
type: 'think'
content: string
} | {
type: 'thinking'
content: string
} | {
type: 'write_to_file'
path: string
content: string
lineCount?: number
} | {
type: 'insert_content'
path: string
startLine: number
content: string
} | {
type: 'read_file'
path: string
finish: boolean
} | {
type: 'attempt_completion'
result: string
finish: boolean
} | {
type: 'search_and_replace'
path: string
content: string
operations: {
search: string
replace: string
start_line?: number
end_line?: number
use_regex?: boolean
ignore_case?: boolean
regex_flags?: string
}[]
finish: boolean
} | {
type: 'apply_diff'
path: string
diff: string
finish: boolean
} | {
type: 'ask_followup_question'
question: string,
finish: boolean
} | {
type: 'list_files'
path: string
recursive?: boolean
finish: boolean
} | {
type: 'regex_search_files'
path: string
regex: string
finish: boolean
} | {
type: 'semantic_search_files'
path: string
query: string
finish: boolean
} | {
type: 'search_web'
query: string
finish: boolean
} | {
type: 'fetch_urls_content'
urls: string[]
finish: boolean
} | {
type: 'switch_mode'
mode: string
reason: string
finish: boolean
}
export function parseMsgBlocks(
input: string,
): ParsedMsgBlock[] {
try {
const parsedResult: ParsedMsgBlock[] = []
const fragment = parseFragment(input, {
sourceCodeLocationInfo: true,
})
let lastEndOffset = 0
for (const node of fragment.childNodes) {
if (node.nodeName === 'thinking') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
const children = node.childNodes
if (children.length === 0) {
parsedResult.push({
type: 'thinking',
content: '',
})
} else {
const innerContentStartOffset =
children[0].sourceCodeLocation?.startOffset
const innerContentEndOffset =
children[children.length - 1].sourceCodeLocation?.endOffset
if (!innerContentStartOffset || !innerContentEndOffset) {
throw new Error('sourceCodeLocation is undefined')
}
parsedResult.push({
type: 'thinking',
content: input.slice(innerContentStartOffset, innerContentEndOffset),
})
}
lastEndOffset = endOffset
} else if (node.nodeName === 'think') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
const children = node.childNodes
if (children.length === 0) {
parsedResult.push({
type: 'think',
content: '',
})
} else {
const innerContentStartOffset =
children[0].sourceCodeLocation?.startOffset
const innerContentEndOffset =
children[children.length - 1].sourceCodeLocation?.endOffset
if (!innerContentStartOffset || !innerContentEndOffset) {
throw new Error('sourceCodeLocation is undefined')
}
parsedResult.push({
type: 'think',
content: input.slice(innerContentStartOffset, innerContentEndOffset),
})
}
lastEndOffset = endOffset
} else if (node.nodeName === 'list_files') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let path: string | undefined
let recursive: boolean | undefined
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
path = childNode.childNodes[0].value
} else if (childNode.nodeName === 'recursive' && childNode.childNodes.length > 0) {
const recursiveValue = childNode.childNodes[0].value
recursive = recursiveValue ? recursiveValue.toLowerCase() === 'true' : false
}
}
parsedResult.push({
type: 'list_files',
path: path || '/',
recursive,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'read_file') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let path: string | undefined
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
path = childNode.childNodes[0].value
}
}
parsedResult.push({
type: 'read_file',
path,
// Check if the tag is completely parsed with proper closing tag
// In parse5, when a tag is properly closed, its sourceCodeLocation will include endTag
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'regex_search_files') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let path: string | undefined
let regex: string | undefined
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
path = childNode.childNodes[0].value
} else if (childNode.nodeName === 'regex' && childNode.childNodes.length > 0) {
regex = childNode.childNodes[0].value
}
}
parsedResult.push({
type: 'regex_search_files',
path: path,
regex: regex,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'semantic_search_files') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let path: string | undefined
let query: string | undefined
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
path = childNode.childNodes[0].value
} else if (childNode.nodeName === 'query' && childNode.childNodes.length > 0) {
query = childNode.childNodes[0].value
}
}
parsedResult.push({
type: 'semantic_search_files',
path: path,
query: query,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'write_to_file') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let path: string | undefined
let content: string = ''
let lineCount: number | undefined
// 处理子标签
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
path = childNode.childNodes[0].value
} else if (childNode.nodeName === 'content' && childNode.childNodes.length > 0) {
// 如果内容有多个子节点,需要合并它们
content = childNode.childNodes.map(n => n.value || '').join('')
} else if (childNode.nodeName === 'line_count' && childNode.childNodes.length > 0) {
const lineCountStr = childNode.childNodes[0].value
lineCount = lineCountStr ? parseInt(lineCountStr) : undefined
}
}
parsedResult.push({
type: 'write_to_file',
content,
path,
lineCount
})
lastEndOffset = endOffset
} else if (node.nodeName === 'insert_content') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let path: string | undefined
let content: string = ''
let startLine: number = 0
// 处理子标签
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
path = childNode.childNodes[0].value
} else if (childNode.nodeName === 'operations' && childNode.childNodes.length > 0) {
try {
const operationsJson = childNode.childNodes[0].value
const operations = JSON5.parse(operationsJson)
if (Array.isArray(operations) && operations.length > 0) {
const operation = operations[0]
startLine = operation.start_line || 1
content = operation.content || ''
}
} catch (error) {
console.error('Failed to parse operations JSON', error)
}
}
}
parsedResult.push({
type: 'insert_content',
path,
startLine,
content
})
lastEndOffset = endOffset
} else if (node.nodeName === 'search_and_replace') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let path: string | undefined
let operations = []
let content: string = ''
// 处理子标签
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
path = childNode.childNodes[0].value
} else if (childNode.nodeName === 'operations' && childNode.childNodes.length > 0) {
try {
content = childNode.childNodes[0].value
operations = JSON5.parse(content)
} catch (error) {
console.error('Failed to parse operations JSON', error)
}
}
}
parsedResult.push({
type: 'search_and_replace',
path,
content,
operations,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'apply_diff') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let path: string | undefined
let diff: string | undefined
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
path = childNode.childNodes[0].value
} else if (childNode.nodeName === 'diff' && childNode.childNodes.length > 0) {
diff = childNode.childNodes[0].value
}
}
parsedResult.push({
type: 'apply_diff',
path,
diff,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'attempt_completion') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let result: string | undefined
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'result' && childNode.childNodes.length > 0) {
result = childNode.childNodes[0].value
}
}
parsedResult.push({
type: 'attempt_completion',
result,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'ask_followup_question') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let question: string | undefined
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'question' && childNode.childNodes.length > 0) {
question = childNode.childNodes[0].value
}
}
parsedResult.push({
type: 'ask_followup_question',
question,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'switch_mode') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let mode: string = ''
let reason: string = ''
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'mode_slug' && childNode.childNodes.length > 0) {
// @ts-ignore - 忽略 value 属性的类型错误
mode = childNode.childNodes[0].value
} else if (childNode.nodeName === 'reason' && childNode.childNodes.length > 0) {
// @ts-ignore - 忽略 value 属性的类型错误
reason = childNode.childNodes[0].value
}
}
parsedResult.push({
type: 'switch_mode',
mode,
reason,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'search_web') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let query: string | undefined
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'query' && childNode.childNodes.length > 0) {
query = childNode.childNodes[0].value
}
}
parsedResult.push({
type: 'search_web',
query: query || '',
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'fetch_urls_content') {
if (!node.sourceCodeLocation) {
throw new Error('sourceCodeLocation is undefined')
}
const startOffset = node.sourceCodeLocation.startOffset
const endOffset = node.sourceCodeLocation.endOffset
if (startOffset > lastEndOffset) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset, startOffset),
})
}
let urls: string[] = []
for (const childNode of node.childNodes) {
if (childNode.nodeName === 'urls' && childNode.childNodes.length > 0) {
try {
const urlsJson = childNode.childNodes[0].value
const parsedUrls = JSON5.parse(urlsJson)
if (Array.isArray(parsedUrls)) {
urls = parsedUrls
}
} catch (error) {
// console.error('Failed to parse URLs JSON', error)
}
}
}
parsedResult.push({
type: 'fetch_urls_content',
urls,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
}
}
// handle the last part of the input
if (lastEndOffset < input.length) {
parsedResult.push({
type: 'string',
content: input.slice(lastEndOffset),
})
}
return parsedResult
} catch (error) {
console.error('Failed to parse infio block', error)
throw error
}
}