infio-copilot/src/core/llm/openai-message-adapter.ts
2025-06-12 16:20:08 +08:00

166 lines
4.9 KiB
TypeScript

import OpenAI from 'openai'
import {
ChatCompletion,
ChatCompletionChunk,
ChatCompletionContentPart,
ChatCompletionMessageParam,
} from 'openai/resources/chat/completions'
import {
LLMOptions,
LLMRequestNonStreaming,
LLMRequestStreaming,
RequestMessage,
} from '../../types/llm/request'
import {
LLMResponseNonStreaming,
LLMResponseStreaming,
} from '../../types/llm/response'
export class OpenAIMessageAdapter {
async generateResponse(
client: OpenAI,
request: LLMRequestNonStreaming,
options?: LLMOptions,
extraParams?: Record<string, any>,
): Promise<LLMResponseNonStreaming> {
const response = await client.chat.completions.create(
{
model: request.model,
messages: request.messages.map((m) =>
OpenAIMessageAdapter.parseRequestMessage(m),
),
max_tokens: request.max_tokens,
temperature: request.temperature,
top_p: request.top_p,
frequency_penalty: request.frequency_penalty,
presence_penalty: request.presence_penalty,
logit_bias: request.logit_bias,
prediction: request.prediction,
...extraParams,
},
{
signal: options?.signal,
},
)
return OpenAIMessageAdapter.parseNonStreamingResponse(response)
}
async streamResponse(
client: OpenAI,
request: LLMRequestStreaming,
options?: LLMOptions,
extraParams?: Record<string, any>,
): Promise<AsyncIterable<LLMResponseStreaming>> {
const stream = await client.chat.completions.create(
{
model: request.model,
messages: request.messages.map((m) =>
OpenAIMessageAdapter.parseRequestMessage(m),
),
max_completion_tokens: request.max_tokens,
temperature: request.temperature,
top_p: request.top_p,
frequency_penalty: request.frequency_penalty,
presence_penalty: request.presence_penalty,
logit_bias: request.logit_bias,
stream: true,
stream_options: {
include_usage: true,
},
...extraParams,
},
{
signal: options?.signal,
},
)
// eslint-disable-next-line no-inner-declarations
async function* streamResponse(): AsyncIterable<LLMResponseStreaming> {
for await (const chunk of stream) {
yield OpenAIMessageAdapter.parseStreamingResponseChunk(chunk)
}
}
return streamResponse()
}
static parseRequestMessage(
message: RequestMessage,
): ChatCompletionMessageParam {
switch (message.role) {
case 'user': {
const content = Array.isArray(message.content)
? message.content.map((part): ChatCompletionContentPart => {
switch (part.type) {
case 'text':
return { type: 'text', text: part.text }
case 'image_url':
return { type: 'image_url', image_url: part.image_url }
}
})
: message.content
return { role: 'user', content }
}
case 'assistant': {
if (Array.isArray(message.content)) {
throw new Error('Assistant message should be a string')
}
return { role: 'assistant', content: message.content }
}
case 'system': {
if (Array.isArray(message.content)) {
throw new Error('System message should be a string')
}
return { role: 'system', content: message.content }
}
}
}
static parseNonStreamingResponse(
response: ChatCompletion,
): LLMResponseNonStreaming {
return {
id: response.id,
choices: response.choices.map((choice) => ({
finish_reason: choice.finish_reason,
message: {
content: choice.message.content,
reasoning_content: 'reasoning_content' in choice.message
? (choice.message.reasoning_content as string)
: ('reasoning' in choice.message ? (choice.message.reasoning as string) : null),
role: choice.message.role,
},
})),
created: response.created,
model: response.model,
object: 'chat.completion',
system_fingerprint: response.system_fingerprint,
usage: response.usage,
}
}
static parseStreamingResponseChunk(
chunk: ChatCompletionChunk,
): LLMResponseStreaming {
return {
id: chunk.id,
choices: chunk.choices.map((choice) => ({
finish_reason: choice.finish_reason ?? null,
delta: {
content: choice.delta.content ?? null,
reasoning_content: 'reasoning_content' in choice.delta
? (choice.delta.reasoning_content as string)
: ('reasoning' in choice.delta ? (choice.delta.reasoning as string) : null),
role: choice.delta.role,
},
})),
created: chunk.created,
model: chunk.model,
object: 'chat.completion.chunk',
system_fingerprint: chunk.system_fingerprint,
usage: chunk.usage ?? undefined,
}
}
}