update LLM models
This commit is contained in:
parent
48b95ea416
commit
120c442274
@ -56,6 +56,7 @@
|
|||||||
"@codemirror/lang-markdown": "^6.3.2",
|
"@codemirror/lang-markdown": "^6.3.2",
|
||||||
"@codemirror/merge": "^6.10.0",
|
"@codemirror/merge": "^6.10.0",
|
||||||
"@electric-sql/pglite": "0.2.14",
|
"@electric-sql/pglite": "0.2.14",
|
||||||
|
"@google/genai": "^1.2.0",
|
||||||
"@google/generative-ai": "^0.21.0",
|
"@google/generative-ai": "^0.21.0",
|
||||||
"@langchain/core": "^0.3.26",
|
"@langchain/core": "^0.3.26",
|
||||||
"@lexical/clipboard": "^0.17.1",
|
"@lexical/clipboard": "^0.17.1",
|
||||||
|
|||||||
765
pnpm-lock.yaml
generated
765
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@ -33,7 +33,7 @@ export const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1'
|
|||||||
export const GROK_BASE_URL = 'https://api.x.ai/v1'
|
export const GROK_BASE_URL = 'https://api.x.ai/v1'
|
||||||
export const SILICONFLOW_BASE_URL = 'https://api.siliconflow.cn/v1'
|
export const SILICONFLOW_BASE_URL = 'https://api.siliconflow.cn/v1'
|
||||||
export const ALIBABA_QWEN_BASE_URL = 'https://dashscope.aliyuncs.com/compatible-mode/v1'
|
export const ALIBABA_QWEN_BASE_URL = 'https://dashscope.aliyuncs.com/compatible-mode/v1'
|
||||||
export const INFIO_BASE_URL = 'https://api.infio.com/api/raw_message'
|
export const INFIO_BASE_URL = 'https://api.infio.app'
|
||||||
export const JINA_BASE_URL = 'https://r.jina.ai'
|
export const JINA_BASE_URL = 'https://r.jina.ai'
|
||||||
export const SERPER_BASE_URL = 'https://serpapi.com/search'
|
export const SERPER_BASE_URL = 'https://serpapi.com/search'
|
||||||
// Pricing in dollars per million tokens
|
// Pricing in dollars per million tokens
|
||||||
|
|||||||
@ -1,11 +1,11 @@
|
|||||||
import {
|
import {
|
||||||
Content,
|
Content,
|
||||||
EnhancedGenerateContentResponse,
|
GoogleGenAI,
|
||||||
GenerateContentResult,
|
|
||||||
GenerateContentStreamResult,
|
|
||||||
GoogleGenerativeAI,
|
|
||||||
Part,
|
Part,
|
||||||
} from '@google/generative-ai'
|
type GenerateContentConfig,
|
||||||
|
type GenerateContentParameters,
|
||||||
|
type GenerateContentResponse,
|
||||||
|
} from "@google/genai"
|
||||||
|
|
||||||
import { LLMModel } from '../../types/llm/model'
|
import { LLMModel } from '../../types/llm/model'
|
||||||
import {
|
import {
|
||||||
@ -18,6 +18,12 @@ import {
|
|||||||
LLMResponseNonStreaming,
|
LLMResponseNonStreaming,
|
||||||
LLMResponseStreaming,
|
LLMResponseStreaming,
|
||||||
} from '../../types/llm/response'
|
} from '../../types/llm/response'
|
||||||
|
import {
|
||||||
|
GeminiModelId,
|
||||||
|
ModelInfo,
|
||||||
|
geminiDefaultModelId,
|
||||||
|
geminiModels
|
||||||
|
} from "../../utils/api"
|
||||||
import { parseImageDataUrl } from '../../utils/image'
|
import { parseImageDataUrl } from '../../utils/image'
|
||||||
|
|
||||||
import { BaseLLMProvider } from './base'
|
import { BaseLLMProvider } from './base'
|
||||||
@ -34,12 +40,41 @@ import {
|
|||||||
* issues are resolved.
|
* issues are resolved.
|
||||||
*/
|
*/
|
||||||
export class GeminiProvider implements BaseLLMProvider {
|
export class GeminiProvider implements BaseLLMProvider {
|
||||||
private client: GoogleGenerativeAI
|
private client: GoogleGenAI
|
||||||
private apiKey: string
|
private apiKey: string
|
||||||
|
private baseUrl: string
|
||||||
|
|
||||||
constructor(apiKey: string) {
|
constructor(apiKey: string, baseUrl?: string) {
|
||||||
this.apiKey = apiKey
|
this.apiKey = apiKey
|
||||||
this.client = new GoogleGenerativeAI(apiKey)
|
this.baseUrl = baseUrl
|
||||||
|
this.client = new GoogleGenAI({ apiKey })
|
||||||
|
}
|
||||||
|
|
||||||
|
getModel(modelId: string) {
|
||||||
|
let id = modelId
|
||||||
|
let info: ModelInfo = geminiModels[id as GeminiModelId]
|
||||||
|
|
||||||
|
if (id?.endsWith(":thinking")) {
|
||||||
|
id = id.slice(0, -":thinking".length)
|
||||||
|
|
||||||
|
if (geminiModels[id as GeminiModelId]) {
|
||||||
|
info = geminiModels[id as GeminiModelId]
|
||||||
|
|
||||||
|
return {
|
||||||
|
id,
|
||||||
|
info,
|
||||||
|
thinkingConfig: undefined,
|
||||||
|
maxOutputTokens: info.maxTokens ?? undefined,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!info) {
|
||||||
|
id = geminiDefaultModelId
|
||||||
|
info = geminiModels[geminiDefaultModelId]
|
||||||
|
}
|
||||||
|
|
||||||
|
return { id, info }
|
||||||
}
|
}
|
||||||
|
|
||||||
async generateResponse(
|
async generateResponse(
|
||||||
@ -53,6 +88,8 @@ export class GeminiProvider implements BaseLLMProvider {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const { id: modelName, thinkingConfig, maxOutputTokens, info } = this.getModel(model.modelId)
|
||||||
|
|
||||||
const systemMessages = request.messages.filter((m) => m.role === 'system')
|
const systemMessages = request.messages.filter((m) => m.role === 'system')
|
||||||
const systemInstruction: string | undefined =
|
const systemInstruction: string | undefined =
|
||||||
systemMessages.length > 0
|
systemMessages.length > 0
|
||||||
@ -60,30 +97,26 @@ export class GeminiProvider implements BaseLLMProvider {
|
|||||||
: undefined
|
: undefined
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const model = this.client.getGenerativeModel({
|
|
||||||
model: request.model,
|
|
||||||
generationConfig: {
|
|
||||||
maxOutputTokens: request.max_tokens,
|
|
||||||
temperature: request.temperature,
|
|
||||||
topP: request.top_p,
|
|
||||||
presencePenalty: request.presence_penalty,
|
|
||||||
frequencyPenalty: request.frequency_penalty,
|
|
||||||
},
|
|
||||||
systemInstruction: systemInstruction,
|
|
||||||
})
|
|
||||||
|
|
||||||
const result = await model.generateContent(
|
const config: GenerateContentConfig = {
|
||||||
{
|
systemInstruction,
|
||||||
systemInstruction: systemInstruction,
|
httpOptions: this.baseUrl ? { baseUrl: this.baseUrl } : undefined,
|
||||||
contents: request.messages
|
thinkingConfig,
|
||||||
.map((message) => GeminiProvider.parseRequestMessage(message))
|
maxOutputTokens: maxOutputTokens ?? request.max_tokens,
|
||||||
.filter((m): m is Content => m !== null),
|
temperature: request.temperature ?? 0,
|
||||||
},
|
topP: request.top_p ?? 1,
|
||||||
{
|
presencePenalty: request.presence_penalty ?? 0,
|
||||||
signal: options?.signal,
|
frequencyPenalty: request.frequency_penalty ?? 0,
|
||||||
},
|
}
|
||||||
)
|
const params: GenerateContentParameters = {
|
||||||
|
model: modelName,
|
||||||
|
contents: request.messages
|
||||||
|
.map((message) => GeminiProvider.parseRequestMessage(message))
|
||||||
|
.filter((m): m is Content => m !== null),
|
||||||
|
config,
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await this.client.models.generateContent(params)
|
||||||
const messageId = crypto.randomUUID() // Gemini does not return a message id
|
const messageId = crypto.randomUUID() // Gemini does not return a message id
|
||||||
return GeminiProvider.parseNonStreamingResponse(
|
return GeminiProvider.parseNonStreamingResponse(
|
||||||
result,
|
result,
|
||||||
@ -115,6 +148,7 @@ export class GeminiProvider implements BaseLLMProvider {
|
|||||||
`Gemini API key is missing. Please set it in settings menu.`,
|
`Gemini API key is missing. Please set it in settings menu.`,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
const { id: modelName, thinkingConfig, maxOutputTokens, info } = this.getModel(model.modelId)
|
||||||
|
|
||||||
const systemMessages = request.messages.filter((m) => m.role === 'system')
|
const systemMessages = request.messages.filter((m) => m.role === 'system')
|
||||||
const systemInstruction: string | undefined =
|
const systemInstruction: string | undefined =
|
||||||
@ -123,30 +157,25 @@ export class GeminiProvider implements BaseLLMProvider {
|
|||||||
: undefined
|
: undefined
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const model = this.client.getGenerativeModel({
|
const config: GenerateContentConfig = {
|
||||||
model: request.model,
|
systemInstruction,
|
||||||
generationConfig: {
|
httpOptions: this.baseUrl ? { baseUrl: this.baseUrl } : undefined,
|
||||||
maxOutputTokens: request.max_tokens,
|
thinkingConfig,
|
||||||
temperature: request.temperature,
|
maxOutputTokens: maxOutputTokens ?? request.max_tokens,
|
||||||
topP: request.top_p,
|
temperature: request.temperature ?? 0,
|
||||||
presencePenalty: request.presence_penalty,
|
topP: request.top_p ?? 1,
|
||||||
frequencyPenalty: request.frequency_penalty,
|
presencePenalty: request.presence_penalty ?? 0,
|
||||||
},
|
frequencyPenalty: request.frequency_penalty ?? 0,
|
||||||
systemInstruction: systemInstruction,
|
}
|
||||||
})
|
const params: GenerateContentParameters = {
|
||||||
|
model: modelName,
|
||||||
const stream = await model.generateContentStream(
|
contents: request.messages
|
||||||
{
|
.map((message) => GeminiProvider.parseRequestMessage(message))
|
||||||
systemInstruction: systemInstruction,
|
.filter((m): m is Content => m !== null),
|
||||||
contents: request.messages
|
config,
|
||||||
.map((message) => GeminiProvider.parseRequestMessage(message))
|
}
|
||||||
.filter((m): m is Content => m !== null),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
signal: options?.signal,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
const stream = await this.client.models.generateContentStream(params)
|
||||||
const messageId = crypto.randomUUID() // Gemini does not return a message id
|
const messageId = crypto.randomUUID() // Gemini does not return a message id
|
||||||
return this.streamResponseGenerator(stream, request.model, messageId)
|
return this.streamResponseGenerator(stream, request.model, messageId)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -165,11 +194,11 @@ export class GeminiProvider implements BaseLLMProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private async *streamResponseGenerator(
|
private async *streamResponseGenerator(
|
||||||
stream: GenerateContentStreamResult,
|
stream: AsyncGenerator<GenerateContentResponse>,
|
||||||
model: string,
|
model: string,
|
||||||
messageId: string,
|
messageId: string,
|
||||||
): AsyncIterable<LLMResponseStreaming> {
|
): AsyncIterable<LLMResponseStreaming> {
|
||||||
for await (const chunk of stream.stream) {
|
for await (const chunk of stream) {
|
||||||
yield GeminiProvider.parseStreamingResponseChunk(chunk, model, messageId)
|
yield GeminiProvider.parseStreamingResponseChunk(chunk, model, messageId)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -215,7 +244,7 @@ export class GeminiProvider implements BaseLLMProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static parseNonStreamingResponse(
|
static parseNonStreamingResponse(
|
||||||
response: GenerateContentResult,
|
response: GenerateContentResponse,
|
||||||
model: string,
|
model: string,
|
||||||
messageId: string,
|
messageId: string,
|
||||||
): LLMResponseNonStreaming {
|
): LLMResponseNonStreaming {
|
||||||
@ -224,9 +253,9 @@ export class GeminiProvider implements BaseLLMProvider {
|
|||||||
choices: [
|
choices: [
|
||||||
{
|
{
|
||||||
finish_reason:
|
finish_reason:
|
||||||
response.response.candidates?.[0]?.finishReason ?? null,
|
response.candidates?.[0]?.finishReason ?? null,
|
||||||
message: {
|
message: {
|
||||||
content: response.response.text(),
|
content: response.candidates?.[0]?.content?.parts?.[0]?.text ?? '',
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -234,29 +263,32 @@ export class GeminiProvider implements BaseLLMProvider {
|
|||||||
created: Date.now(),
|
created: Date.now(),
|
||||||
model: model,
|
model: model,
|
||||||
object: 'chat.completion',
|
object: 'chat.completion',
|
||||||
usage: response.response.usageMetadata
|
usage: response.usageMetadata
|
||||||
? {
|
? {
|
||||||
prompt_tokens: response.response.usageMetadata.promptTokenCount,
|
prompt_tokens: response.usageMetadata.promptTokenCount,
|
||||||
completion_tokens:
|
completion_tokens:
|
||||||
response.response.usageMetadata.candidatesTokenCount,
|
response.usageMetadata.candidatesTokenCount,
|
||||||
total_tokens: response.response.usageMetadata.totalTokenCount,
|
total_tokens: response.usageMetadata.totalTokenCount,
|
||||||
}
|
}
|
||||||
: undefined,
|
: undefined,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static parseStreamingResponseChunk(
|
static parseStreamingResponseChunk(
|
||||||
chunk: EnhancedGenerateContentResponse,
|
chunk: GenerateContentResponse,
|
||||||
model: string,
|
model: string,
|
||||||
messageId: string,
|
messageId: string,
|
||||||
): LLMResponseStreaming {
|
): LLMResponseStreaming {
|
||||||
|
const firstCandidate = chunk.candidates?.[0]
|
||||||
|
const textContent = firstCandidate?.content?.parts?.[0]?.text || ''
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: messageId,
|
id: messageId,
|
||||||
choices: [
|
choices: [
|
||||||
{
|
{
|
||||||
finish_reason: chunk.candidates?.[0]?.finishReason ?? null,
|
finish_reason: firstCandidate?.finishReason ?? null,
|
||||||
delta: {
|
delta: {
|
||||||
content: chunk.text(),
|
content: textContent,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|||||||
@ -1,251 +0,0 @@
|
|||||||
import OpenAI from 'openai'
|
|
||||||
import {
|
|
||||||
ChatCompletion,
|
|
||||||
ChatCompletionChunk,
|
|
||||||
} from 'openai/resources/chat/completions'
|
|
||||||
|
|
||||||
import { INFIO_BASE_URL } from '../../constants'
|
|
||||||
import { LLMModel } from '../../types/llm/model'
|
|
||||||
import {
|
|
||||||
LLMRequestNonStreaming,
|
|
||||||
LLMRequestStreaming,
|
|
||||||
RequestMessage
|
|
||||||
} from '../../types/llm/request'
|
|
||||||
import {
|
|
||||||
LLMResponseNonStreaming,
|
|
||||||
LLMResponseStreaming,
|
|
||||||
} from '../../types/llm/response'
|
|
||||||
|
|
||||||
import { BaseLLMProvider } from './base'
|
|
||||||
import {
|
|
||||||
LLMAPIKeyInvalidException,
|
|
||||||
LLMAPIKeyNotSetException,
|
|
||||||
} from './exception'
|
|
||||||
|
|
||||||
export type RangeFilter = {
|
|
||||||
gte?: number;
|
|
||||||
lte?: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
export type ChunkFilter = {
|
|
||||||
field: string;
|
|
||||||
match_all?: string[];
|
|
||||||
range?: RangeFilter;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Interface for making requests to the Infio API
|
|
||||||
*/
|
|
||||||
export type InfioRequest = {
|
|
||||||
/** Required: The content of the user message to attach to the topic and then generate an assistant message in response to */
|
|
||||||
messages: RequestMessage[];
|
|
||||||
// /** Required: The ID of the topic to attach the message to */
|
|
||||||
// topic_id: string;
|
|
||||||
/** Optional: URLs to include */
|
|
||||||
links?: string[];
|
|
||||||
/** Optional: Files to include */
|
|
||||||
files?: string[];
|
|
||||||
/** Optional: Whether to highlight results in chunk_html. Default is true */
|
|
||||||
highlight_results?: boolean;
|
|
||||||
/** Optional: Delimiters for highlighting citations. Default is [".", "!", "?", "\n", "\t", ","] */
|
|
||||||
highlight_delimiters?: string[];
|
|
||||||
/** Optional: Search type - "semantic", "fulltext", or "hybrid". Default is "hybrid" */
|
|
||||||
search_type?: string;
|
|
||||||
/** Optional: Filters for chunk filtering */
|
|
||||||
filters?: ChunkFilter;
|
|
||||||
/** Optional: Whether to use web search API. Default is false */
|
|
||||||
use_web_search?: boolean;
|
|
||||||
/** Optional: LLM model to use */
|
|
||||||
llm_model?: string;
|
|
||||||
/** Optional: Force source */
|
|
||||||
force_source?: string;
|
|
||||||
/** Optional: Whether completion should come before chunks in stream. Default is false */
|
|
||||||
completion_first?: boolean;
|
|
||||||
/** Optional: Whether to stream the response. Default is true */
|
|
||||||
stream_response?: boolean;
|
|
||||||
/** Optional: Sampling temperature between 0 and 2. Default is 0.5 */
|
|
||||||
temperature?: number;
|
|
||||||
/** Optional: Frequency penalty between -2.0 and 2.0. Default is 0.7 */
|
|
||||||
frequency_penalty?: number;
|
|
||||||
/** Optional: Presence penalty between -2.0 and 2.0. Default is 0.7 */
|
|
||||||
presence_penalty?: number;
|
|
||||||
/** Optional: Maximum tokens to generate */
|
|
||||||
max_tokens?: number;
|
|
||||||
/** Optional: Stop tokens (up to 4 sequences) */
|
|
||||||
stop_tokens?: string[];
|
|
||||||
}
|
|
||||||
|
|
||||||
export class InfioProvider implements BaseLLMProvider {
|
|
||||||
// private adapter: OpenAIMessageAdapter
|
|
||||||
// private client: OpenAI
|
|
||||||
private apiKey: string
|
|
||||||
private baseUrl: string
|
|
||||||
|
|
||||||
constructor(apiKey: string) {
|
|
||||||
// this.client = new OpenAI({ apiKey, dangerouslyAllowBrowser: true })
|
|
||||||
// this.adapter = new OpenAIMessageAdapter()
|
|
||||||
this.apiKey = apiKey
|
|
||||||
this.baseUrl = INFIO_BASE_URL
|
|
||||||
}
|
|
||||||
|
|
||||||
async generateResponse(
|
|
||||||
model: LLMModel,
|
|
||||||
request: LLMRequestNonStreaming,
|
|
||||||
// options?: LLMOptions,
|
|
||||||
): Promise<LLMResponseNonStreaming> {
|
|
||||||
if (!this.apiKey) {
|
|
||||||
throw new LLMAPIKeyNotSetException(
|
|
||||||
'OpenAI API key is missing. Please set it in settings menu.',
|
|
||||||
)
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
const req: InfioRequest = {
|
|
||||||
messages: request.messages,
|
|
||||||
stream_response: false,
|
|
||||||
temperature: request.temperature,
|
|
||||||
frequency_penalty: request.frequency_penalty,
|
|
||||||
presence_penalty: request.presence_penalty,
|
|
||||||
max_tokens: request.max_tokens,
|
|
||||||
}
|
|
||||||
const req_options = {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
Authorization: this.apiKey,
|
|
||||||
"TR-Dataset": "74aaec22-0cf0-4cba-80a5-ae5c0518344e",
|
|
||||||
'Content-Type': 'application/json'
|
|
||||||
},
|
|
||||||
body: JSON.stringify(req)
|
|
||||||
};
|
|
||||||
|
|
||||||
const response = await fetch(this.baseUrl, req_options);
|
|
||||||
if (!response.ok) {
|
|
||||||
throw new Error(`HTTP error! status: ${response.status}`);
|
|
||||||
}
|
|
||||||
const data = await response.json() as ChatCompletion;
|
|
||||||
return InfioProvider.parseNonStreamingResponse(data);
|
|
||||||
} catch (error) {
|
|
||||||
if (error instanceof OpenAI.AuthenticationError) {
|
|
||||||
throw new LLMAPIKeyInvalidException(
|
|
||||||
'OpenAI API key is invalid. Please update it in settings menu.',
|
|
||||||
)
|
|
||||||
}
|
|
||||||
throw error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async streamResponse(
|
|
||||||
model: LLMModel,
|
|
||||||
request: LLMRequestStreaming,
|
|
||||||
): Promise<AsyncIterable<LLMResponseStreaming>> {
|
|
||||||
if (!this.apiKey) {
|
|
||||||
throw new LLMAPIKeyNotSetException(
|
|
||||||
'OpenAI API key is missing. Please set it in settings menu.',
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const req: InfioRequest = {
|
|
||||||
llm_model: request.model,
|
|
||||||
messages: request.messages,
|
|
||||||
stream_response: true,
|
|
||||||
temperature: request.temperature,
|
|
||||||
frequency_penalty: request.frequency_penalty,
|
|
||||||
presence_penalty: request.presence_penalty,
|
|
||||||
max_tokens: request.max_tokens,
|
|
||||||
}
|
|
||||||
const req_options = {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
Authorization: this.apiKey,
|
|
||||||
"TR-Dataset": "74aaec22-0cf0-4cba-80a5-ae5c0518344e",
|
|
||||||
"Content-Type": "application/json"
|
|
||||||
},
|
|
||||||
body: JSON.stringify(req)
|
|
||||||
};
|
|
||||||
|
|
||||||
const response = await fetch(this.baseUrl, req_options);
|
|
||||||
if (!response.ok) {
|
|
||||||
throw new Error(`HTTP error! status: ${response.status}`);
|
|
||||||
}
|
|
||||||
if (!response.body) {
|
|
||||||
throw new Error('Response body is null');
|
|
||||||
}
|
|
||||||
|
|
||||||
const reader = response.body.getReader();
|
|
||||||
const decoder = new TextDecoder();
|
|
||||||
|
|
||||||
return {
|
|
||||||
[Symbol.asyncIterator]: async function* () {
|
|
||||||
try {
|
|
||||||
while (true) {
|
|
||||||
const { done, value } = await reader.read();
|
|
||||||
if (done) break;
|
|
||||||
|
|
||||||
const chunk = decoder.decode(value);
|
|
||||||
const lines = chunk.split('\n').filter(line => line.trim());
|
|
||||||
|
|
||||||
for (const line of lines) {
|
|
||||||
if (line.startsWith('data: ')) {
|
|
||||||
const jsonData = JSON.parse(line.slice(6)) as ChatCompletionChunk;
|
|
||||||
if (!jsonData || typeof jsonData !== 'object' || !('choices' in jsonData)) {
|
|
||||||
throw new Error('Invalid chunk format received');
|
|
||||||
}
|
|
||||||
yield InfioProvider.parseStreamingResponseChunk(jsonData);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
reader.releaseLock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} catch (error) {
|
|
||||||
if (error instanceof OpenAI.AuthenticationError) {
|
|
||||||
throw new LLMAPIKeyInvalidException(
|
|
||||||
'OpenAI API key is invalid. Please update it in settings menu.',
|
|
||||||
)
|
|
||||||
}
|
|
||||||
throw error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static parseNonStreamingResponse(
|
|
||||||
response: ChatCompletion,
|
|
||||||
): LLMResponseNonStreaming {
|
|
||||||
return {
|
|
||||||
id: response.id,
|
|
||||||
choices: response.choices.map((choice) => ({
|
|
||||||
finish_reason: choice.finish_reason,
|
|
||||||
message: {
|
|
||||||
content: choice.message.content,
|
|
||||||
role: choice.message.role,
|
|
||||||
},
|
|
||||||
})),
|
|
||||||
created: response.created,
|
|
||||||
model: response.model,
|
|
||||||
object: 'chat.completion',
|
|
||||||
system_fingerprint: response.system_fingerprint,
|
|
||||||
usage: response.usage,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static parseStreamingResponseChunk(
|
|
||||||
chunk: ChatCompletionChunk,
|
|
||||||
): LLMResponseStreaming {
|
|
||||||
return {
|
|
||||||
id: chunk.id,
|
|
||||||
choices: chunk.choices.map((choice) => ({
|
|
||||||
finish_reason: choice.finish_reason ?? null,
|
|
||||||
delta: {
|
|
||||||
content: choice.delta.content ?? null,
|
|
||||||
role: choice.delta.role,
|
|
||||||
},
|
|
||||||
})),
|
|
||||||
created: chunk.created,
|
|
||||||
model: chunk.model,
|
|
||||||
object: 'chat.completion.chunk',
|
|
||||||
system_fingerprint: chunk.system_fingerprint,
|
|
||||||
usage: chunk.usage ?? undefined,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,4 +1,4 @@
|
|||||||
import { ALIBABA_QWEN_BASE_URL, DEEPSEEK_BASE_URL, GROK_BASE_URL, OPENROUTER_BASE_URL, SILICONFLOW_BASE_URL } from '../../constants'
|
import { ALIBABA_QWEN_BASE_URL, DEEPSEEK_BASE_URL, GROK_BASE_URL, INFIO_BASE_URL, OPENROUTER_BASE_URL, SILICONFLOW_BASE_URL } from '../../constants'
|
||||||
import { ApiProvider, LLMModel } from '../../types/llm/model'
|
import { ApiProvider, LLMModel } from '../../types/llm/model'
|
||||||
import {
|
import {
|
||||||
LLMOptions,
|
LLMOptions,
|
||||||
@ -14,7 +14,6 @@ import { InfioSettings } from '../../types/settings'
|
|||||||
import { AnthropicProvider } from './anthropic'
|
import { AnthropicProvider } from './anthropic'
|
||||||
import { GeminiProvider } from './gemini'
|
import { GeminiProvider } from './gemini'
|
||||||
import { GroqProvider } from './groq'
|
import { GroqProvider } from './groq'
|
||||||
import { InfioProvider } from './infio'
|
|
||||||
import { OllamaProvider } from './ollama'
|
import { OllamaProvider } from './ollama'
|
||||||
import { OpenAIAuthenticatedProvider } from './openai'
|
import { OpenAIAuthenticatedProvider } from './openai'
|
||||||
import { OpenAICompatibleProvider } from './openai-compatible'
|
import { OpenAICompatibleProvider } from './openai-compatible'
|
||||||
@ -40,7 +39,7 @@ class LLMManager implements LLMManagerInterface {
|
|||||||
private googleProvider: GeminiProvider
|
private googleProvider: GeminiProvider
|
||||||
private groqProvider: GroqProvider
|
private groqProvider: GroqProvider
|
||||||
private grokProvider: OpenAICompatibleProvider
|
private grokProvider: OpenAICompatibleProvider
|
||||||
private infioProvider: InfioProvider
|
private infioProvider: OpenAICompatibleProvider
|
||||||
private openrouterProvider: OpenAICompatibleProvider
|
private openrouterProvider: OpenAICompatibleProvider
|
||||||
private siliconflowProvider: OpenAICompatibleProvider
|
private siliconflowProvider: OpenAICompatibleProvider
|
||||||
private alibabaQwenProvider: OpenAICompatibleProvider
|
private alibabaQwenProvider: OpenAICompatibleProvider
|
||||||
@ -49,7 +48,10 @@ class LLMManager implements LLMManagerInterface {
|
|||||||
private isInfioEnabled: boolean
|
private isInfioEnabled: boolean
|
||||||
|
|
||||||
constructor(settings: InfioSettings) {
|
constructor(settings: InfioSettings) {
|
||||||
this.infioProvider = new InfioProvider(settings.infioProvider.apiKey)
|
this.infioProvider = new OpenAICompatibleProvider(
|
||||||
|
settings.infioProvider.apiKey,
|
||||||
|
INFIO_BASE_URL
|
||||||
|
)
|
||||||
this.openrouterProvider = new OpenAICompatibleProvider(
|
this.openrouterProvider = new OpenAICompatibleProvider(
|
||||||
settings.openrouterProvider.apiKey,
|
settings.openrouterProvider.apiKey,
|
||||||
settings.openrouterProvider.baseUrl && settings.openrouterProvider.useCustomUrl ?
|
settings.openrouterProvider.baseUrl && settings.openrouterProvider.useCustomUrl ?
|
||||||
@ -93,14 +95,14 @@ class LLMManager implements LLMManagerInterface {
|
|||||||
request: LLMRequestNonStreaming,
|
request: LLMRequestNonStreaming,
|
||||||
options?: LLMOptions,
|
options?: LLMOptions,
|
||||||
): Promise<LLMResponseNonStreaming> {
|
): Promise<LLMResponseNonStreaming> {
|
||||||
if (this.isInfioEnabled) {
|
console.log("model", model)
|
||||||
return await this.infioProvider.generateResponse(
|
|
||||||
model,
|
|
||||||
request,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
// use custom provider
|
|
||||||
switch (model.provider) {
|
switch (model.provider) {
|
||||||
|
case ApiProvider.Infio:
|
||||||
|
return await this.infioProvider.generateResponse(
|
||||||
|
model,
|
||||||
|
request,
|
||||||
|
options,
|
||||||
|
)
|
||||||
case ApiProvider.OpenRouter:
|
case ApiProvider.OpenRouter:
|
||||||
return await this.openrouterProvider.generateResponse(
|
return await this.openrouterProvider.generateResponse(
|
||||||
model,
|
model,
|
||||||
@ -169,11 +171,9 @@ class LLMManager implements LLMManagerInterface {
|
|||||||
request: LLMRequestStreaming,
|
request: LLMRequestStreaming,
|
||||||
options?: LLMOptions,
|
options?: LLMOptions,
|
||||||
): Promise<AsyncIterable<LLMResponseStreaming>> {
|
): Promise<AsyncIterable<LLMResponseStreaming>> {
|
||||||
if (this.isInfioEnabled) {
|
|
||||||
return await this.infioProvider.streamResponse(model, request)
|
|
||||||
}
|
|
||||||
// use custom provider
|
|
||||||
switch (model.provider) {
|
switch (model.provider) {
|
||||||
|
case ApiProvider.Infio:
|
||||||
|
return await this.infioProvider.streamResponse(model, request, options)
|
||||||
case ApiProvider.OpenRouter:
|
case ApiProvider.OpenRouter:
|
||||||
return await this.openrouterProvider.streamResponse(model, request, options)
|
return await this.openrouterProvider.streamResponse(model, request, options)
|
||||||
case ApiProvider.SiliconFlow:
|
case ApiProvider.SiliconFlow:
|
||||||
|
|||||||
@ -171,6 +171,7 @@ const CustomProviderSettings: React.FC<CustomProviderSettingsProps> = ({ plugin,
|
|||||||
<div className="infio-llm-setting-divider"></div>
|
<div className="infio-llm-setting-divider"></div>
|
||||||
<ComboBoxComponent
|
<ComboBoxComponent
|
||||||
name={t("settings.Models.chatModel")}
|
name={t("settings.Models.chatModel")}
|
||||||
|
settings={settings}
|
||||||
provider={settings.chatModelProvider || currProvider}
|
provider={settings.chatModelProvider || currProvider}
|
||||||
modelId={settings.chatModelId}
|
modelId={settings.chatModelId}
|
||||||
updateModel={updateChatModelId}
|
updateModel={updateChatModelId}
|
||||||
@ -178,6 +179,7 @@ const CustomProviderSettings: React.FC<CustomProviderSettingsProps> = ({ plugin,
|
|||||||
<div className="infio-llm-setting-divider"></div>
|
<div className="infio-llm-setting-divider"></div>
|
||||||
<ComboBoxComponent
|
<ComboBoxComponent
|
||||||
name={t("settings.Models.autocompleteModel")}
|
name={t("settings.Models.autocompleteModel")}
|
||||||
|
settings={settings}
|
||||||
provider={settings.applyModelProvider || currProvider}
|
provider={settings.applyModelProvider || currProvider}
|
||||||
modelId={settings.applyModelId}
|
modelId={settings.applyModelId}
|
||||||
updateModel={updateApplyModelId}
|
updateModel={updateApplyModelId}
|
||||||
@ -185,6 +187,7 @@ const CustomProviderSettings: React.FC<CustomProviderSettingsProps> = ({ plugin,
|
|||||||
<div className="infio-llm-setting-divider"></div>
|
<div className="infio-llm-setting-divider"></div>
|
||||||
<ComboBoxComponent
|
<ComboBoxComponent
|
||||||
name={t("settings.Models.embeddingModel")}
|
name={t("settings.Models.embeddingModel")}
|
||||||
|
settings={settings}
|
||||||
provider={settings.embeddingModelProvider || ApiProvider.Google}
|
provider={settings.embeddingModelProvider || ApiProvider.Google}
|
||||||
modelId={settings.embeddingModelId}
|
modelId={settings.embeddingModelId}
|
||||||
isEmbedding={true}
|
isEmbedding={true}
|
||||||
|
|||||||
@ -3,6 +3,7 @@ import Fuse, { FuseResult } from "fuse.js";
|
|||||||
import React, { useEffect, useMemo, useRef, useState } from "react";
|
import React, { useEffect, useMemo, useRef, useState } from "react";
|
||||||
|
|
||||||
import { ApiProvider } from "../../types/llm/model";
|
import { ApiProvider } from "../../types/llm/model";
|
||||||
|
import { InfioSettings } from "../../types/settings";
|
||||||
// import { PROVIDERS } from '../constants';
|
// import { PROVIDERS } from '../constants';
|
||||||
import { GetAllProviders, GetEmbeddingProviderModelIds, GetEmbeddingProviders, GetProviderModelIds } from "../../utils/api";
|
import { GetAllProviders, GetEmbeddingProviderModelIds, GetEmbeddingProviders, GetProviderModelIds } from "../../utils/api";
|
||||||
|
|
||||||
@ -149,6 +150,7 @@ export type ComboBoxComponentProps = {
|
|||||||
name: string;
|
name: string;
|
||||||
provider: ApiProvider;
|
provider: ApiProvider;
|
||||||
modelId: string;
|
modelId: string;
|
||||||
|
settings?: InfioSettings | null;
|
||||||
isEmbedding?: boolean,
|
isEmbedding?: boolean,
|
||||||
updateModel: (provider: ApiProvider, modelId: string) => void;
|
updateModel: (provider: ApiProvider, modelId: string) => void;
|
||||||
};
|
};
|
||||||
@ -157,6 +159,7 @@ export const ComboBoxComponent: React.FC<ComboBoxComponentProps> = ({
|
|||||||
name,
|
name,
|
||||||
provider,
|
provider,
|
||||||
modelId,
|
modelId,
|
||||||
|
settings = null,
|
||||||
isEmbedding = false,
|
isEmbedding = false,
|
||||||
updateModel,
|
updateModel,
|
||||||
}) => {
|
}) => {
|
||||||
@ -177,7 +180,7 @@ export const ComboBoxComponent: React.FC<ComboBoxComponentProps> = ({
|
|||||||
const fetchModelIds = async () => {
|
const fetchModelIds = async () => {
|
||||||
const ids = isEmbedding
|
const ids = isEmbedding
|
||||||
? GetEmbeddingProviderModelIds(modelProvider)
|
? GetEmbeddingProviderModelIds(modelProvider)
|
||||||
: await GetProviderModelIds(modelProvider);
|
: await GetProviderModelIds(modelProvider, settings);
|
||||||
setModelIds(ids);
|
setModelIds(ids);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
331
src/utils/api.ts
331
src/utils/api.ts
@ -1,5 +1,6 @@
|
|||||||
import { OPENROUTER_BASE_URL } from '../constants'
|
import { INFIO_BASE_URL, OPENROUTER_BASE_URL } from '../constants'
|
||||||
import { ApiProvider } from '../types/llm/model'
|
import { ApiProvider } from '../types/llm/model'
|
||||||
|
import { InfioSettings } from '../types/settings'
|
||||||
|
|
||||||
export interface ModelInfo {
|
export interface ModelInfo {
|
||||||
maxTokens?: number
|
maxTokens?: number
|
||||||
@ -14,6 +15,15 @@ export interface ModelInfo {
|
|||||||
description?: string
|
description?: string
|
||||||
reasoningEffort?: string,
|
reasoningEffort?: string,
|
||||||
thinking?: boolean
|
thinking?: boolean
|
||||||
|
maxThinkingTokens?: number
|
||||||
|
supportsReasoningBudget?: boolean
|
||||||
|
requiredReasoningBudget?: boolean
|
||||||
|
tiers?: readonly {
|
||||||
|
readonly contextWindow: number,
|
||||||
|
readonly inputPrice: number,
|
||||||
|
readonly outputPrice: number,
|
||||||
|
readonly cacheReadsPrice: number,
|
||||||
|
}[]
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface EmbeddingModelInfo {
|
export interface EmbeddingModelInfo {
|
||||||
@ -21,89 +31,13 @@ export interface EmbeddingModelInfo {
|
|||||||
description?: string
|
description?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Infio
|
|
||||||
// https://infio.app/pricing
|
|
||||||
export type InfioModelId = keyof typeof infioModels
|
|
||||||
export const infioDefaultModelId: InfioModelId = "deepseek-chat"
|
|
||||||
export const infioModels = {
|
|
||||||
"deepseek-chat": {
|
|
||||||
maxTokens: 8_000,
|
|
||||||
contextWindow: 64_000,
|
|
||||||
supportsImages: false,
|
|
||||||
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
|
|
||||||
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
|
|
||||||
outputPrice: 0.28,
|
|
||||||
cacheWritesPrice: 0.14,
|
|
||||||
cacheReadsPrice: 0.014,
|
|
||||||
},
|
|
||||||
"deepseek-reasoner": {
|
|
||||||
maxTokens: 8_000,
|
|
||||||
contextWindow: 64_000,
|
|
||||||
supportsImages: false,
|
|
||||||
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
|
|
||||||
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
|
|
||||||
outputPrice: 2.19,
|
|
||||||
cacheWritesPrice: 0.55,
|
|
||||||
cacheReadsPrice: 0.14,
|
|
||||||
},
|
|
||||||
"o3-mini": {
|
|
||||||
maxTokens: 100_000,
|
|
||||||
contextWindow: 200_000,
|
|
||||||
supportsImages: false,
|
|
||||||
supportsPromptCache: false,
|
|
||||||
inputPrice: 1.1,
|
|
||||||
outputPrice: 4.4,
|
|
||||||
},
|
|
||||||
// don't support tool use yet
|
|
||||||
o1: {
|
|
||||||
maxTokens: 100_000,
|
|
||||||
contextWindow: 200_000,
|
|
||||||
supportsImages: true,
|
|
||||||
supportsPromptCache: false,
|
|
||||||
inputPrice: 15,
|
|
||||||
outputPrice: 60,
|
|
||||||
},
|
|
||||||
"o1-preview": {
|
|
||||||
maxTokens: 32_768,
|
|
||||||
contextWindow: 128_000,
|
|
||||||
supportsImages: true,
|
|
||||||
supportsPromptCache: false,
|
|
||||||
inputPrice: 15,
|
|
||||||
outputPrice: 60,
|
|
||||||
},
|
|
||||||
"o1-mini": {
|
|
||||||
maxTokens: 65_536,
|
|
||||||
contextWindow: 128_000,
|
|
||||||
supportsImages: true,
|
|
||||||
supportsPromptCache: false,
|
|
||||||
inputPrice: 1.1,
|
|
||||||
outputPrice: 4.4,
|
|
||||||
},
|
|
||||||
"gpt-4o": {
|
|
||||||
maxTokens: 4_096,
|
|
||||||
contextWindow: 128_000,
|
|
||||||
supportsImages: true,
|
|
||||||
supportsPromptCache: false,
|
|
||||||
inputPrice: 2.5,
|
|
||||||
outputPrice: 10,
|
|
||||||
},
|
|
||||||
"gpt-4o-mini": {
|
|
||||||
maxTokens: 16_384,
|
|
||||||
contextWindow: 128_000,
|
|
||||||
supportsImages: true,
|
|
||||||
supportsPromptCache: false,
|
|
||||||
inputPrice: 0.15,
|
|
||||||
outputPrice: 0.6,
|
|
||||||
},
|
|
||||||
} as const satisfies Record<string, ModelInfo>
|
|
||||||
|
|
||||||
// Anthropic
|
// Anthropic
|
||||||
// https://docs.anthropic.com/en/docs/about-claude/models
|
// https://docs.anthropic.com/en/docs/about-claude/models
|
||||||
export type AnthropicModelId = keyof typeof anthropicModels
|
export type AnthropicModelId = keyof typeof anthropicModels
|
||||||
export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219"
|
export const anthropicDefaultModelId: AnthropicModelId = "claude-sonnet-4-20250514"
|
||||||
export const anthropicModels = {
|
export const anthropicModels = {
|
||||||
"claude-3-7-sonnet-20250219:thinking": {
|
"claude-sonnet-4-20250514": {
|
||||||
maxTokens: 128_000,
|
maxTokens: 64_000, // Overridden to 8k if `enableReasoningEffort` is false.
|
||||||
contextWindow: 200_000,
|
contextWindow: 200_000,
|
||||||
supportsImages: true,
|
supportsImages: true,
|
||||||
supportsComputerUse: true,
|
supportsComputerUse: true,
|
||||||
@ -112,10 +46,35 @@ export const anthropicModels = {
|
|||||||
outputPrice: 15.0, // $15 per million output tokens
|
outputPrice: 15.0, // $15 per million output tokens
|
||||||
cacheWritesPrice: 3.75, // $3.75 per million tokens
|
cacheWritesPrice: 3.75, // $3.75 per million tokens
|
||||||
cacheReadsPrice: 0.3, // $0.30 per million tokens
|
cacheReadsPrice: 0.3, // $0.30 per million tokens
|
||||||
thinking: true,
|
supportsReasoningBudget: true,
|
||||||
|
},
|
||||||
|
"claude-opus-4-20250514": {
|
||||||
|
maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false.
|
||||||
|
contextWindow: 200_000,
|
||||||
|
supportsImages: true,
|
||||||
|
supportsComputerUse: true,
|
||||||
|
supportsPromptCache: true,
|
||||||
|
inputPrice: 15.0, // $15 per million input tokens
|
||||||
|
outputPrice: 75.0, // $75 per million output tokens
|
||||||
|
cacheWritesPrice: 18.75, // $18.75 per million tokens
|
||||||
|
cacheReadsPrice: 1.5, // $1.50 per million tokens
|
||||||
|
supportsReasoningBudget: true,
|
||||||
|
},
|
||||||
|
"claude-3-7-sonnet-20250219:thinking": {
|
||||||
|
maxTokens: 128_000, // Unlocked by passing `beta` flag to the model. Otherwise, it's 64k.
|
||||||
|
contextWindow: 200_000,
|
||||||
|
supportsImages: true,
|
||||||
|
supportsComputerUse: true,
|
||||||
|
supportsPromptCache: true,
|
||||||
|
inputPrice: 3.0, // $3 per million input tokens
|
||||||
|
outputPrice: 15.0, // $15 per million output tokens
|
||||||
|
cacheWritesPrice: 3.75, // $3.75 per million tokens
|
||||||
|
cacheReadsPrice: 0.3, // $0.30 per million tokens
|
||||||
|
supportsReasoningBudget: true,
|
||||||
|
requiredReasoningBudget: true,
|
||||||
},
|
},
|
||||||
"claude-3-7-sonnet-20250219": {
|
"claude-3-7-sonnet-20250219": {
|
||||||
maxTokens: 8192,
|
maxTokens: 8192, // Since we already have a `:thinking` virtual model we aren't setting `supportsReasoningBudget: true` here.
|
||||||
contextWindow: 200_000,
|
contextWindow: 200_000,
|
||||||
supportsImages: true,
|
supportsImages: true,
|
||||||
supportsComputerUse: true,
|
supportsComputerUse: true,
|
||||||
@ -124,7 +83,6 @@ export const anthropicModels = {
|
|||||||
outputPrice: 15.0, // $15 per million output tokens
|
outputPrice: 15.0, // $15 per million output tokens
|
||||||
cacheWritesPrice: 3.75, // $3.75 per million tokens
|
cacheWritesPrice: 3.75, // $3.75 per million tokens
|
||||||
cacheReadsPrice: 0.3, // $0.30 per million tokens
|
cacheReadsPrice: 0.3, // $0.30 per million tokens
|
||||||
thinking: false,
|
|
||||||
},
|
},
|
||||||
"claude-3-5-sonnet-20241022": {
|
"claude-3-5-sonnet-20241022": {
|
||||||
maxTokens: 8192,
|
maxTokens: 8192,
|
||||||
@ -167,10 +125,71 @@ export const anthropicModels = {
|
|||||||
cacheWritesPrice: 0.3,
|
cacheWritesPrice: 0.3,
|
||||||
cacheReadsPrice: 0.03,
|
cacheReadsPrice: 0.03,
|
||||||
},
|
},
|
||||||
} as const satisfies Record<string, ModelInfo>
|
} as const satisfies Record<string, ModelInfo> // as const assertion makes the object
|
||||||
|
|
||||||
|
// Infio
|
||||||
|
export const infioDefaultModelId = "deepseek/deepseek-v3" // will always exist
|
||||||
|
export const infioDefaultModelInfo: ModelInfo = {
|
||||||
|
maxTokens: 8192,
|
||||||
|
contextWindow: 65_536,
|
||||||
|
supportsImages: false,
|
||||||
|
supportsComputerUse: true,
|
||||||
|
supportsPromptCache: true,
|
||||||
|
inputPrice: 0.272,
|
||||||
|
outputPrice: 1.088,
|
||||||
|
cacheWritesPrice: 0.14,
|
||||||
|
cacheReadsPrice: 0.014,
|
||||||
|
}
|
||||||
|
let infioModelsCache: Record<string, ModelInfo> | null = null;
|
||||||
|
|
||||||
|
async function fetchInfioModels(apiKey?: string): Promise<Record<string, ModelInfo>> {
|
||||||
|
if (infioModelsCache) {
|
||||||
|
return infioModelsCache;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const headers: Record<string, string> = {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
};
|
||||||
|
|
||||||
|
// 添加Authorization请求头,使用Bearer格式,如果有API密钥的话
|
||||||
|
if (apiKey) {
|
||||||
|
headers['Authorization'] = `Bearer ${apiKey}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch(INFIO_BASE_URL + "/model_group/info", {
|
||||||
|
method: 'GET',
|
||||||
|
headers: headers
|
||||||
|
});
|
||||||
|
const data = await response.json();
|
||||||
|
const models: Record<string, ModelInfo> = {};
|
||||||
|
if (data?.data) {
|
||||||
|
for (const model of data.data) {
|
||||||
|
models[model.model_group] = {
|
||||||
|
maxTokens: model.max_output_tokens,
|
||||||
|
contextWindow: model.max_input_tokens,
|
||||||
|
supportsImages: false,
|
||||||
|
supportsPromptCache: false,
|
||||||
|
inputPrice: model.input_cost_per_token ? model.input_cost_per_token * 1000000 : 0,
|
||||||
|
outputPrice: model.output_cost_per_token ? model.output_cost_per_token * 1000000 : 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
infioModelsCache = models;
|
||||||
|
return models;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to fetch Infio models:', error);
|
||||||
|
// 如果出错,返回默认模型
|
||||||
|
return {
|
||||||
|
[infioDefaultModelId]: infioDefaultModelInfo
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// OpenRouter
|
// OpenRouter
|
||||||
// https://openrouter.ai/models?order=newest&supported_parameters=tools
|
// https://openrouter.ai/models?order=newest&supported_parameters=tools
|
||||||
export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet" // will always exist in openRouterModels
|
export const openRouterDefaultModelId = "anthropic/claude-sonnet-4" // will always exist in openRouterModels
|
||||||
export const openRouterDefaultModelInfo: ModelInfo = {
|
export const openRouterDefaultModelInfo: ModelInfo = {
|
||||||
maxTokens: 8192,
|
maxTokens: 8192,
|
||||||
contextWindow: 200_000,
|
contextWindow: 200_000,
|
||||||
@ -222,8 +241,31 @@ async function fetchOpenRouterModels(): Promise<Record<string, ModelInfo>> {
|
|||||||
// Gemini
|
// Gemini
|
||||||
// https://ai.google.dev/gemini-api/docs/models/gemini
|
// https://ai.google.dev/gemini-api/docs/models/gemini
|
||||||
export type GeminiModelId = keyof typeof geminiModels
|
export type GeminiModelId = keyof typeof geminiModels
|
||||||
export const geminiDefaultModelId: GeminiModelId = "gemini-2.5-flash-preview-04-17"
|
export const geminiDefaultModelId: GeminiModelId = "gemini-2.5-flash-preview-05-20"
|
||||||
export const geminiModels = {
|
export const geminiModels = {
|
||||||
|
"gemini-2.5-flash-preview-05-20:thinking": {
|
||||||
|
maxTokens: 65_535,
|
||||||
|
contextWindow: 1_048_576,
|
||||||
|
supportsImages: true,
|
||||||
|
supportsPromptCache: true,
|
||||||
|
inputPrice: 0.15,
|
||||||
|
outputPrice: 3.5,
|
||||||
|
cacheReadsPrice: 0.0375,
|
||||||
|
cacheWritesPrice: 1.0,
|
||||||
|
maxThinkingTokens: 24_576,
|
||||||
|
supportsReasoningBudget: true,
|
||||||
|
requiredReasoningBudget: true,
|
||||||
|
},
|
||||||
|
"gemini-2.5-flash-preview-05-20": {
|
||||||
|
maxTokens: 65_535,
|
||||||
|
contextWindow: 1_048_576,
|
||||||
|
supportsImages: true,
|
||||||
|
supportsPromptCache: true,
|
||||||
|
inputPrice: 0.15,
|
||||||
|
outputPrice: 0.6,
|
||||||
|
cacheReadsPrice: 0.0375,
|
||||||
|
cacheWritesPrice: 1.0,
|
||||||
|
},
|
||||||
"gemini-2.5-flash-preview-04-17:thinking": {
|
"gemini-2.5-flash-preview-04-17:thinking": {
|
||||||
maxTokens: 65_535,
|
maxTokens: 65_535,
|
||||||
contextWindow: 1_048_576,
|
contextWindow: 1_048_576,
|
||||||
@ -231,8 +273,9 @@ export const geminiModels = {
|
|||||||
supportsPromptCache: false,
|
supportsPromptCache: false,
|
||||||
inputPrice: 0.15,
|
inputPrice: 0.15,
|
||||||
outputPrice: 3.5,
|
outputPrice: 3.5,
|
||||||
thinking: true,
|
maxThinkingTokens: 24_576,
|
||||||
// maxThinkingTokens: 24_576,
|
supportsReasoningBudget: true,
|
||||||
|
requiredReasoningBudget: true,
|
||||||
},
|
},
|
||||||
"gemini-2.5-flash-preview-04-17": {
|
"gemini-2.5-flash-preview-04-17": {
|
||||||
maxTokens: 65_535,
|
maxTokens: 65_535,
|
||||||
@ -241,7 +284,6 @@ export const geminiModels = {
|
|||||||
supportsPromptCache: false,
|
supportsPromptCache: false,
|
||||||
inputPrice: 0.15,
|
inputPrice: 0.15,
|
||||||
outputPrice: 0.6,
|
outputPrice: 0.6,
|
||||||
thinking: false,
|
|
||||||
},
|
},
|
||||||
"gemini-2.5-pro-exp-03-25": {
|
"gemini-2.5-pro-exp-03-25": {
|
||||||
maxTokens: 65_535,
|
maxTokens: 65_535,
|
||||||
@ -260,6 +302,44 @@ export const geminiModels = {
|
|||||||
outputPrice: 15,
|
outputPrice: 15,
|
||||||
cacheReadsPrice: 0.625,
|
cacheReadsPrice: 0.625,
|
||||||
cacheWritesPrice: 4.5,
|
cacheWritesPrice: 4.5,
|
||||||
|
tiers: [
|
||||||
|
{
|
||||||
|
contextWindow: 200_000,
|
||||||
|
inputPrice: 1.25,
|
||||||
|
outputPrice: 10,
|
||||||
|
cacheReadsPrice: 0.31,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
contextWindow: Infinity,
|
||||||
|
inputPrice: 2.5,
|
||||||
|
outputPrice: 15,
|
||||||
|
cacheReadsPrice: 0.625,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"gemini-2.5-pro-preview-05-06": {
|
||||||
|
maxTokens: 65_535,
|
||||||
|
contextWindow: 1_048_576,
|
||||||
|
supportsImages: true,
|
||||||
|
supportsPromptCache: true,
|
||||||
|
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
|
||||||
|
outputPrice: 15,
|
||||||
|
cacheReadsPrice: 0.625,
|
||||||
|
cacheWritesPrice: 4.5,
|
||||||
|
tiers: [
|
||||||
|
{
|
||||||
|
contextWindow: 200_000,
|
||||||
|
inputPrice: 1.25,
|
||||||
|
outputPrice: 10,
|
||||||
|
cacheReadsPrice: 0.31,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
contextWindow: Infinity,
|
||||||
|
inputPrice: 2.5,
|
||||||
|
outputPrice: 15,
|
||||||
|
cacheReadsPrice: 0.625,
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
"gemini-2.0-flash-001": {
|
"gemini-2.0-flash-001": {
|
||||||
maxTokens: 8192,
|
maxTokens: 8192,
|
||||||
@ -315,9 +395,25 @@ export const geminiModels = {
|
|||||||
maxTokens: 8192,
|
maxTokens: 8192,
|
||||||
contextWindow: 1_048_576,
|
contextWindow: 1_048_576,
|
||||||
supportsImages: true,
|
supportsImages: true,
|
||||||
supportsPromptCache: false,
|
supportsPromptCache: true,
|
||||||
inputPrice: 0,
|
inputPrice: 0.15, // This is the pricing for prompts above 128k tokens.
|
||||||
outputPrice: 0,
|
outputPrice: 0.6,
|
||||||
|
cacheReadsPrice: 0.0375,
|
||||||
|
cacheWritesPrice: 1.0,
|
||||||
|
tiers: [
|
||||||
|
{
|
||||||
|
contextWindow: 128_000,
|
||||||
|
inputPrice: 0.075,
|
||||||
|
outputPrice: 0.3,
|
||||||
|
cacheReadsPrice: 0.01875,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
contextWindow: Infinity,
|
||||||
|
inputPrice: 0.15,
|
||||||
|
outputPrice: 0.6,
|
||||||
|
cacheReadsPrice: 0.0375,
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
"gemini-1.5-flash-exp-0827": {
|
"gemini-1.5-flash-exp-0827": {
|
||||||
maxTokens: 8192,
|
maxTokens: 8192,
|
||||||
@ -360,6 +456,7 @@ export const geminiModels = {
|
|||||||
outputPrice: 0,
|
outputPrice: 0,
|
||||||
},
|
},
|
||||||
} as const satisfies Record<string, ModelInfo>
|
} as const satisfies Record<string, ModelInfo>
|
||||||
|
|
||||||
export const geminiEmbeddingModels = {
|
export const geminiEmbeddingModels = {
|
||||||
"text-embedding-004": {
|
"text-embedding-004": {
|
||||||
dimensions: 768,
|
dimensions: 768,
|
||||||
@ -482,8 +579,8 @@ export const deepSeekModels = {
|
|||||||
contextWindow: 64_000,
|
contextWindow: 64_000,
|
||||||
supportsImages: false,
|
supportsImages: false,
|
||||||
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
|
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
|
||||||
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
|
inputPrice: 0.272, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
|
||||||
outputPrice: 0.28,
|
outputPrice: 1.088,
|
||||||
cacheWritesPrice: 0.14,
|
cacheWritesPrice: 0.14,
|
||||||
cacheReadsPrice: 0.014,
|
cacheReadsPrice: 0.014,
|
||||||
},
|
},
|
||||||
@ -1505,10 +1602,46 @@ export const GetEmbeddingProviders = (): ApiProvider[] => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get all models for a provider
|
// Get all models for a provider
|
||||||
export const GetProviderModels = async (provider: ApiProvider): Promise<Record<string, ModelInfo>> => {
|
export const GetProviderModels = async (provider: ApiProvider, settings?: InfioSettings): Promise<Record<string, ModelInfo>> => {
|
||||||
switch (provider) {
|
switch (provider) {
|
||||||
case ApiProvider.Infio:
|
case ApiProvider.Infio: {
|
||||||
return infioModels
|
const apiKey = settings?.infioProvider?.apiKey
|
||||||
|
return await fetchInfioModels(apiKey)
|
||||||
|
}
|
||||||
|
case ApiProvider.OpenRouter:
|
||||||
|
return await fetchOpenRouterModels()
|
||||||
|
case ApiProvider.OpenAI:
|
||||||
|
return openAiNativeModels
|
||||||
|
case ApiProvider.AlibabaQwen:
|
||||||
|
return qwenModels
|
||||||
|
case ApiProvider.SiliconFlow:
|
||||||
|
return siliconFlowModels
|
||||||
|
case ApiProvider.Anthropic:
|
||||||
|
return anthropicModels
|
||||||
|
case ApiProvider.Deepseek:
|
||||||
|
return deepSeekModels
|
||||||
|
case ApiProvider.Google:
|
||||||
|
return geminiModels
|
||||||
|
case ApiProvider.Groq:
|
||||||
|
return groqModels
|
||||||
|
case ApiProvider.Grok:
|
||||||
|
return grokModels
|
||||||
|
case ApiProvider.Ollama:
|
||||||
|
return {}
|
||||||
|
case ApiProvider.OpenAICompatible:
|
||||||
|
return {}
|
||||||
|
default:
|
||||||
|
return {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all models for a provider with settings (needed for providers that require API keys)
|
||||||
|
export const GetProviderModelsWithSettings = async (provider: ApiProvider, settings?: InfioSettings): Promise<Record<string, ModelInfo>> => {
|
||||||
|
switch (provider) {
|
||||||
|
case ApiProvider.Infio: {
|
||||||
|
const apiKey = settings?.infioProvider?.apiKey
|
||||||
|
return await fetchInfioModels(apiKey)
|
||||||
|
}
|
||||||
case ApiProvider.OpenRouter:
|
case ApiProvider.OpenRouter:
|
||||||
return await fetchOpenRouterModels()
|
return await fetchOpenRouterModels()
|
||||||
case ApiProvider.OpenAI:
|
case ApiProvider.OpenAI:
|
||||||
@ -1537,8 +1670,8 @@ export const GetProviderModels = async (provider: ApiProvider): Promise<Record<s
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get all model ids for a provider
|
// Get all model ids for a provider
|
||||||
export const GetProviderModelIds = async (provider: ApiProvider): Promise<string[]> => {
|
export const GetProviderModelIds = async (provider: ApiProvider, settings?: InfioSettings): Promise<string[]> => {
|
||||||
const models = await GetProviderModels(provider)
|
const models = await GetProviderModels(provider, settings)
|
||||||
return Object.keys(models)
|
return Object.keys(models)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user