580 lines
17 KiB
TypeScript
580 lines
17 KiB
TypeScript
import JSON5 from 'json5'
|
|
import { parseFragment } from 'parse5'
|
|
|
|
export type ParsedMsgBlock =
|
|
| {
|
|
type: 'string'
|
|
content: string
|
|
}
|
|
| {
|
|
type: 'think'
|
|
content: string
|
|
} | {
|
|
type: 'thinking'
|
|
content: string
|
|
} | {
|
|
type: 'write_to_file'
|
|
path: string
|
|
content: string
|
|
lineCount?: number
|
|
} | {
|
|
type: 'insert_content'
|
|
path: string
|
|
startLine: number
|
|
content: string
|
|
} | {
|
|
type: 'read_file'
|
|
path: string
|
|
finish: boolean
|
|
} | {
|
|
type: 'attempt_completion'
|
|
result: string
|
|
finish: boolean
|
|
} | {
|
|
type: 'search_and_replace'
|
|
path: string
|
|
content: string
|
|
operations: {
|
|
search: string
|
|
replace: string
|
|
start_line?: number
|
|
end_line?: number
|
|
use_regex?: boolean
|
|
ignore_case?: boolean
|
|
regex_flags?: string
|
|
}[]
|
|
finish: boolean
|
|
} | {
|
|
type: 'apply_diff'
|
|
path: string
|
|
diff: string
|
|
finish: boolean
|
|
} | {
|
|
type: 'ask_followup_question'
|
|
question: string,
|
|
finish: boolean
|
|
} | {
|
|
type: 'list_files'
|
|
path: string
|
|
recursive?: boolean
|
|
finish: boolean
|
|
} | {
|
|
type: 'regex_search_files'
|
|
path: string
|
|
regex: string
|
|
finish: boolean
|
|
} | {
|
|
type: 'semantic_search_files'
|
|
path: string
|
|
query: string
|
|
finish: boolean
|
|
} | {
|
|
type: 'search_web'
|
|
query: string
|
|
finish: boolean
|
|
} | {
|
|
type: 'fetch_urls_content'
|
|
urls: string[]
|
|
finish: boolean
|
|
} | {
|
|
type: 'switch_mode'
|
|
mode: string
|
|
reason: string
|
|
finish: boolean
|
|
}
|
|
|
|
export function parseMsgBlocks(
|
|
input: string,
|
|
): ParsedMsgBlock[] {
|
|
try {
|
|
const parsedResult: ParsedMsgBlock[] = []
|
|
const fragment = parseFragment(input, {
|
|
sourceCodeLocationInfo: true,
|
|
})
|
|
let lastEndOffset = 0
|
|
for (const node of fragment.childNodes) {
|
|
if (node.nodeName === 'thinking') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
|
|
const children = node.childNodes
|
|
if (children.length === 0) {
|
|
parsedResult.push({
|
|
type: 'thinking',
|
|
content: '',
|
|
})
|
|
} else {
|
|
const innerContentStartOffset =
|
|
children[0].sourceCodeLocation?.startOffset
|
|
const innerContentEndOffset =
|
|
children[children.length - 1].sourceCodeLocation?.endOffset
|
|
if (!innerContentStartOffset || !innerContentEndOffset) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
parsedResult.push({
|
|
type: 'thinking',
|
|
content: input.slice(innerContentStartOffset, innerContentEndOffset),
|
|
})
|
|
}
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'think') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
|
|
const children = node.childNodes
|
|
if (children.length === 0) {
|
|
parsedResult.push({
|
|
type: 'think',
|
|
content: '',
|
|
})
|
|
} else {
|
|
const innerContentStartOffset =
|
|
children[0].sourceCodeLocation?.startOffset
|
|
const innerContentEndOffset =
|
|
children[children.length - 1].sourceCodeLocation?.endOffset
|
|
if (!innerContentStartOffset || !innerContentEndOffset) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
parsedResult.push({
|
|
type: 'think',
|
|
content: input.slice(innerContentStartOffset, innerContentEndOffset),
|
|
})
|
|
}
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'list_files') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let path: string | undefined
|
|
let recursive: boolean | undefined
|
|
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
|
|
path = childNode.childNodes[0].value
|
|
} else if (childNode.nodeName === 'recursive' && childNode.childNodes.length > 0) {
|
|
const recursiveValue = childNode.childNodes[0].value
|
|
recursive = recursiveValue ? recursiveValue.toLowerCase() === 'true' : false
|
|
}
|
|
}
|
|
|
|
parsedResult.push({
|
|
type: 'list_files',
|
|
path: path || '/',
|
|
recursive,
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'read_file') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let path: string | undefined
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
|
|
path = childNode.childNodes[0].value
|
|
}
|
|
}
|
|
parsedResult.push({
|
|
type: 'read_file',
|
|
path,
|
|
// Check if the tag is completely parsed with proper closing tag
|
|
// In parse5, when a tag is properly closed, its sourceCodeLocation will include endTag
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'regex_search_files') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let path: string | undefined
|
|
let regex: string | undefined
|
|
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
|
|
path = childNode.childNodes[0].value
|
|
} else if (childNode.nodeName === 'regex' && childNode.childNodes.length > 0) {
|
|
regex = childNode.childNodes[0].value
|
|
}
|
|
}
|
|
|
|
parsedResult.push({
|
|
type: 'regex_search_files',
|
|
path: path,
|
|
regex: regex,
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'semantic_search_files') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let path: string | undefined
|
|
let query: string | undefined
|
|
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
|
|
path = childNode.childNodes[0].value
|
|
} else if (childNode.nodeName === 'query' && childNode.childNodes.length > 0) {
|
|
query = childNode.childNodes[0].value
|
|
}
|
|
}
|
|
|
|
parsedResult.push({
|
|
type: 'semantic_search_files',
|
|
path: path,
|
|
query: query,
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'write_to_file') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let path: string | undefined
|
|
let content: string = ''
|
|
let lineCount: number | undefined
|
|
// 处理子标签
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
|
|
path = childNode.childNodes[0].value
|
|
} else if (childNode.nodeName === 'content' && childNode.childNodes.length > 0) {
|
|
// 如果内容有多个子节点,需要合并它们
|
|
content = childNode.childNodes.map(n => n.value || '').join('')
|
|
} else if (childNode.nodeName === 'line_count' && childNode.childNodes.length > 0) {
|
|
const lineCountStr = childNode.childNodes[0].value
|
|
lineCount = lineCountStr ? parseInt(lineCountStr) : undefined
|
|
}
|
|
}
|
|
parsedResult.push({
|
|
type: 'write_to_file',
|
|
content,
|
|
path,
|
|
lineCount
|
|
})
|
|
lastEndOffset = endOffset
|
|
|
|
} else if (node.nodeName === 'insert_content') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let path: string | undefined
|
|
let content: string = ''
|
|
let startLine: number = 0
|
|
|
|
// 处理子标签
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
|
|
path = childNode.childNodes[0].value
|
|
} else if (childNode.nodeName === 'operations' && childNode.childNodes.length > 0) {
|
|
try {
|
|
const operationsJson = childNode.childNodes[0].value
|
|
const operations = JSON5.parse(operationsJson)
|
|
if (Array.isArray(operations) && operations.length > 0) {
|
|
const operation = operations[0]
|
|
startLine = operation.start_line || 1
|
|
content = operation.content || ''
|
|
}
|
|
} catch (error) {
|
|
console.error('Failed to parse operations JSON', error)
|
|
}
|
|
}
|
|
}
|
|
|
|
parsedResult.push({
|
|
type: 'insert_content',
|
|
path,
|
|
startLine,
|
|
content
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'search_and_replace') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let path: string | undefined
|
|
let operations = []
|
|
let content: string = ''
|
|
|
|
// 处理子标签
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
|
|
path = childNode.childNodes[0].value
|
|
} else if (childNode.nodeName === 'operations' && childNode.childNodes.length > 0) {
|
|
try {
|
|
content = childNode.childNodes[0].value
|
|
operations = JSON5.parse(content)
|
|
} catch (error) {
|
|
console.error('Failed to parse operations JSON', error)
|
|
}
|
|
}
|
|
}
|
|
|
|
parsedResult.push({
|
|
type: 'search_and_replace',
|
|
path,
|
|
content,
|
|
operations,
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'apply_diff') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let path: string | undefined
|
|
let diff: string | undefined
|
|
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'path' && childNode.childNodes.length > 0) {
|
|
path = childNode.childNodes[0].value
|
|
} else if (childNode.nodeName === 'diff' && childNode.childNodes.length > 0) {
|
|
diff = childNode.childNodes[0].value
|
|
}
|
|
}
|
|
|
|
parsedResult.push({
|
|
type: 'apply_diff',
|
|
path,
|
|
diff,
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'attempt_completion') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let result: string | undefined
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'result' && childNode.childNodes.length > 0) {
|
|
result = childNode.childNodes[0].value
|
|
}
|
|
}
|
|
parsedResult.push({
|
|
type: 'attempt_completion',
|
|
result,
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'ask_followup_question') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let question: string | undefined
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'question' && childNode.childNodes.length > 0) {
|
|
question = childNode.childNodes[0].value
|
|
}
|
|
}
|
|
parsedResult.push({
|
|
type: 'ask_followup_question',
|
|
question,
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'switch_mode') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
|
|
let mode: string = ''
|
|
let reason: string = ''
|
|
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'mode_slug' && childNode.childNodes.length > 0) {
|
|
// @ts-ignore - 忽略 value 属性的类型错误
|
|
mode = childNode.childNodes[0].value
|
|
} else if (childNode.nodeName === 'reason' && childNode.childNodes.length > 0) {
|
|
// @ts-ignore - 忽略 value 属性的类型错误
|
|
reason = childNode.childNodes[0].value
|
|
}
|
|
}
|
|
|
|
parsedResult.push({
|
|
type: 'switch_mode',
|
|
mode,
|
|
reason,
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'search_web') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
let query: string | undefined
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'query' && childNode.childNodes.length > 0) {
|
|
query = childNode.childNodes[0].value
|
|
}
|
|
}
|
|
parsedResult.push({
|
|
type: 'search_web',
|
|
query: query || '',
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
} else if (node.nodeName === 'fetch_urls_content') {
|
|
if (!node.sourceCodeLocation) {
|
|
throw new Error('sourceCodeLocation is undefined')
|
|
}
|
|
const startOffset = node.sourceCodeLocation.startOffset
|
|
const endOffset = node.sourceCodeLocation.endOffset
|
|
if (startOffset > lastEndOffset) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset, startOffset),
|
|
})
|
|
}
|
|
|
|
let urls: string[] = []
|
|
|
|
for (const childNode of node.childNodes) {
|
|
if (childNode.nodeName === 'urls' && childNode.childNodes.length > 0) {
|
|
try {
|
|
const urlsJson = childNode.childNodes[0].value
|
|
const parsedUrls = JSON5.parse(urlsJson)
|
|
if (Array.isArray(parsedUrls)) {
|
|
urls = parsedUrls
|
|
}
|
|
} catch (error) {
|
|
// console.error('Failed to parse URLs JSON', error)
|
|
}
|
|
}
|
|
}
|
|
|
|
parsedResult.push({
|
|
type: 'fetch_urls_content',
|
|
urls,
|
|
finish: node.sourceCodeLocation.endTag !== undefined
|
|
})
|
|
lastEndOffset = endOffset
|
|
}
|
|
}
|
|
|
|
// handle the last part of the input
|
|
if (lastEndOffset < input.length) {
|
|
parsedResult.push({
|
|
type: 'string',
|
|
content: input.slice(lastEndOffset),
|
|
})
|
|
}
|
|
return parsedResult
|
|
} catch (error) {
|
|
console.error('Failed to parse infio block', error)
|
|
throw error
|
|
}
|
|
}
|