update LLM models

This commit is contained in:
duanfuxiang 2025-05-29 22:40:20 +08:00
parent 48b95ea416
commit 120c442274
9 changed files with 1014 additions and 536 deletions

View File

@ -56,6 +56,7 @@
"@codemirror/lang-markdown": "^6.3.2", "@codemirror/lang-markdown": "^6.3.2",
"@codemirror/merge": "^6.10.0", "@codemirror/merge": "^6.10.0",
"@electric-sql/pglite": "0.2.14", "@electric-sql/pglite": "0.2.14",
"@google/genai": "^1.2.0",
"@google/generative-ai": "^0.21.0", "@google/generative-ai": "^0.21.0",
"@langchain/core": "^0.3.26", "@langchain/core": "^0.3.26",
"@lexical/clipboard": "^0.17.1", "@lexical/clipboard": "^0.17.1",

765
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@ -33,7 +33,7 @@ export const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1'
export const GROK_BASE_URL = 'https://api.x.ai/v1' export const GROK_BASE_URL = 'https://api.x.ai/v1'
export const SILICONFLOW_BASE_URL = 'https://api.siliconflow.cn/v1' export const SILICONFLOW_BASE_URL = 'https://api.siliconflow.cn/v1'
export const ALIBABA_QWEN_BASE_URL = 'https://dashscope.aliyuncs.com/compatible-mode/v1' export const ALIBABA_QWEN_BASE_URL = 'https://dashscope.aliyuncs.com/compatible-mode/v1'
export const INFIO_BASE_URL = 'https://api.infio.com/api/raw_message' export const INFIO_BASE_URL = 'https://api.infio.app'
export const JINA_BASE_URL = 'https://r.jina.ai' export const JINA_BASE_URL = 'https://r.jina.ai'
export const SERPER_BASE_URL = 'https://serpapi.com/search' export const SERPER_BASE_URL = 'https://serpapi.com/search'
// Pricing in dollars per million tokens // Pricing in dollars per million tokens

View File

@ -1,11 +1,11 @@
import { import {
Content, Content,
EnhancedGenerateContentResponse, GoogleGenAI,
GenerateContentResult,
GenerateContentStreamResult,
GoogleGenerativeAI,
Part, Part,
} from '@google/generative-ai' type GenerateContentConfig,
type GenerateContentParameters,
type GenerateContentResponse,
} from "@google/genai"
import { LLMModel } from '../../types/llm/model' import { LLMModel } from '../../types/llm/model'
import { import {
@ -18,6 +18,12 @@ import {
LLMResponseNonStreaming, LLMResponseNonStreaming,
LLMResponseStreaming, LLMResponseStreaming,
} from '../../types/llm/response' } from '../../types/llm/response'
import {
GeminiModelId,
ModelInfo,
geminiDefaultModelId,
geminiModels
} from "../../utils/api"
import { parseImageDataUrl } from '../../utils/image' import { parseImageDataUrl } from '../../utils/image'
import { BaseLLMProvider } from './base' import { BaseLLMProvider } from './base'
@ -34,12 +40,41 @@ import {
* issues are resolved. * issues are resolved.
*/ */
export class GeminiProvider implements BaseLLMProvider { export class GeminiProvider implements BaseLLMProvider {
private client: GoogleGenerativeAI private client: GoogleGenAI
private apiKey: string private apiKey: string
private baseUrl: string
constructor(apiKey: string) { constructor(apiKey: string, baseUrl?: string) {
this.apiKey = apiKey this.apiKey = apiKey
this.client = new GoogleGenerativeAI(apiKey) this.baseUrl = baseUrl
this.client = new GoogleGenAI({ apiKey })
}
getModel(modelId: string) {
let id = modelId
let info: ModelInfo = geminiModels[id as GeminiModelId]
if (id?.endsWith(":thinking")) {
id = id.slice(0, -":thinking".length)
if (geminiModels[id as GeminiModelId]) {
info = geminiModels[id as GeminiModelId]
return {
id,
info,
thinkingConfig: undefined,
maxOutputTokens: info.maxTokens ?? undefined,
}
}
}
if (!info) {
id = geminiDefaultModelId
info = geminiModels[geminiDefaultModelId]
}
return { id, info }
} }
async generateResponse( async generateResponse(
@ -53,6 +88,8 @@ export class GeminiProvider implements BaseLLMProvider {
) )
} }
const { id: modelName, thinkingConfig, maxOutputTokens, info } = this.getModel(model.modelId)
const systemMessages = request.messages.filter((m) => m.role === 'system') const systemMessages = request.messages.filter((m) => m.role === 'system')
const systemInstruction: string | undefined = const systemInstruction: string | undefined =
systemMessages.length > 0 systemMessages.length > 0
@ -60,30 +97,26 @@ export class GeminiProvider implements BaseLLMProvider {
: undefined : undefined
try { try {
const model = this.client.getGenerativeModel({
model: request.model,
generationConfig: {
maxOutputTokens: request.max_tokens,
temperature: request.temperature,
topP: request.top_p,
presencePenalty: request.presence_penalty,
frequencyPenalty: request.frequency_penalty,
},
systemInstruction: systemInstruction,
})
const result = await model.generateContent( const config: GenerateContentConfig = {
{ systemInstruction,
systemInstruction: systemInstruction, httpOptions: this.baseUrl ? { baseUrl: this.baseUrl } : undefined,
contents: request.messages thinkingConfig,
.map((message) => GeminiProvider.parseRequestMessage(message)) maxOutputTokens: maxOutputTokens ?? request.max_tokens,
.filter((m): m is Content => m !== null), temperature: request.temperature ?? 0,
}, topP: request.top_p ?? 1,
{ presencePenalty: request.presence_penalty ?? 0,
signal: options?.signal, frequencyPenalty: request.frequency_penalty ?? 0,
}, }
) const params: GenerateContentParameters = {
model: modelName,
contents: request.messages
.map((message) => GeminiProvider.parseRequestMessage(message))
.filter((m): m is Content => m !== null),
config,
}
const result = await this.client.models.generateContent(params)
const messageId = crypto.randomUUID() // Gemini does not return a message id const messageId = crypto.randomUUID() // Gemini does not return a message id
return GeminiProvider.parseNonStreamingResponse( return GeminiProvider.parseNonStreamingResponse(
result, result,
@ -115,6 +148,7 @@ export class GeminiProvider implements BaseLLMProvider {
`Gemini API key is missing. Please set it in settings menu.`, `Gemini API key is missing. Please set it in settings menu.`,
) )
} }
const { id: modelName, thinkingConfig, maxOutputTokens, info } = this.getModel(model.modelId)
const systemMessages = request.messages.filter((m) => m.role === 'system') const systemMessages = request.messages.filter((m) => m.role === 'system')
const systemInstruction: string | undefined = const systemInstruction: string | undefined =
@ -123,30 +157,25 @@ export class GeminiProvider implements BaseLLMProvider {
: undefined : undefined
try { try {
const model = this.client.getGenerativeModel({ const config: GenerateContentConfig = {
model: request.model, systemInstruction,
generationConfig: { httpOptions: this.baseUrl ? { baseUrl: this.baseUrl } : undefined,
maxOutputTokens: request.max_tokens, thinkingConfig,
temperature: request.temperature, maxOutputTokens: maxOutputTokens ?? request.max_tokens,
topP: request.top_p, temperature: request.temperature ?? 0,
presencePenalty: request.presence_penalty, topP: request.top_p ?? 1,
frequencyPenalty: request.frequency_penalty, presencePenalty: request.presence_penalty ?? 0,
}, frequencyPenalty: request.frequency_penalty ?? 0,
systemInstruction: systemInstruction, }
}) const params: GenerateContentParameters = {
model: modelName,
const stream = await model.generateContentStream( contents: request.messages
{ .map((message) => GeminiProvider.parseRequestMessage(message))
systemInstruction: systemInstruction, .filter((m): m is Content => m !== null),
contents: request.messages config,
.map((message) => GeminiProvider.parseRequestMessage(message)) }
.filter((m): m is Content => m !== null),
},
{
signal: options?.signal,
},
)
const stream = await this.client.models.generateContentStream(params)
const messageId = crypto.randomUUID() // Gemini does not return a message id const messageId = crypto.randomUUID() // Gemini does not return a message id
return this.streamResponseGenerator(stream, request.model, messageId) return this.streamResponseGenerator(stream, request.model, messageId)
} catch (error) { } catch (error) {
@ -165,11 +194,11 @@ export class GeminiProvider implements BaseLLMProvider {
} }
private async *streamResponseGenerator( private async *streamResponseGenerator(
stream: GenerateContentStreamResult, stream: AsyncGenerator<GenerateContentResponse>,
model: string, model: string,
messageId: string, messageId: string,
): AsyncIterable<LLMResponseStreaming> { ): AsyncIterable<LLMResponseStreaming> {
for await (const chunk of stream.stream) { for await (const chunk of stream) {
yield GeminiProvider.parseStreamingResponseChunk(chunk, model, messageId) yield GeminiProvider.parseStreamingResponseChunk(chunk, model, messageId)
} }
} }
@ -215,7 +244,7 @@ export class GeminiProvider implements BaseLLMProvider {
} }
static parseNonStreamingResponse( static parseNonStreamingResponse(
response: GenerateContentResult, response: GenerateContentResponse,
model: string, model: string,
messageId: string, messageId: string,
): LLMResponseNonStreaming { ): LLMResponseNonStreaming {
@ -224,9 +253,9 @@ export class GeminiProvider implements BaseLLMProvider {
choices: [ choices: [
{ {
finish_reason: finish_reason:
response.response.candidates?.[0]?.finishReason ?? null, response.candidates?.[0]?.finishReason ?? null,
message: { message: {
content: response.response.text(), content: response.candidates?.[0]?.content?.parts?.[0]?.text ?? '',
role: 'assistant', role: 'assistant',
}, },
}, },
@ -234,29 +263,32 @@ export class GeminiProvider implements BaseLLMProvider {
created: Date.now(), created: Date.now(),
model: model, model: model,
object: 'chat.completion', object: 'chat.completion',
usage: response.response.usageMetadata usage: response.usageMetadata
? { ? {
prompt_tokens: response.response.usageMetadata.promptTokenCount, prompt_tokens: response.usageMetadata.promptTokenCount,
completion_tokens: completion_tokens:
response.response.usageMetadata.candidatesTokenCount, response.usageMetadata.candidatesTokenCount,
total_tokens: response.response.usageMetadata.totalTokenCount, total_tokens: response.usageMetadata.totalTokenCount,
} }
: undefined, : undefined,
} }
} }
static parseStreamingResponseChunk( static parseStreamingResponseChunk(
chunk: EnhancedGenerateContentResponse, chunk: GenerateContentResponse,
model: string, model: string,
messageId: string, messageId: string,
): LLMResponseStreaming { ): LLMResponseStreaming {
const firstCandidate = chunk.candidates?.[0]
const textContent = firstCandidate?.content?.parts?.[0]?.text || ''
return { return {
id: messageId, id: messageId,
choices: [ choices: [
{ {
finish_reason: chunk.candidates?.[0]?.finishReason ?? null, finish_reason: firstCandidate?.finishReason ?? null,
delta: { delta: {
content: chunk.text(), content: textContent,
}, },
}, },
], ],

View File

@ -1,251 +0,0 @@
import OpenAI from 'openai'
import {
ChatCompletion,
ChatCompletionChunk,
} from 'openai/resources/chat/completions'
import { INFIO_BASE_URL } from '../../constants'
import { LLMModel } from '../../types/llm/model'
import {
LLMRequestNonStreaming,
LLMRequestStreaming,
RequestMessage
} from '../../types/llm/request'
import {
LLMResponseNonStreaming,
LLMResponseStreaming,
} from '../../types/llm/response'
import { BaseLLMProvider } from './base'
import {
LLMAPIKeyInvalidException,
LLMAPIKeyNotSetException,
} from './exception'
export type RangeFilter = {
gte?: number;
lte?: number;
}
export type ChunkFilter = {
field: string;
match_all?: string[];
range?: RangeFilter;
}
/**
* Interface for making requests to the Infio API
*/
export type InfioRequest = {
/** Required: The content of the user message to attach to the topic and then generate an assistant message in response to */
messages: RequestMessage[];
// /** Required: The ID of the topic to attach the message to */
// topic_id: string;
/** Optional: URLs to include */
links?: string[];
/** Optional: Files to include */
files?: string[];
/** Optional: Whether to highlight results in chunk_html. Default is true */
highlight_results?: boolean;
/** Optional: Delimiters for highlighting citations. Default is [".", "!", "?", "\n", "\t", ","] */
highlight_delimiters?: string[];
/** Optional: Search type - "semantic", "fulltext", or "hybrid". Default is "hybrid" */
search_type?: string;
/** Optional: Filters for chunk filtering */
filters?: ChunkFilter;
/** Optional: Whether to use web search API. Default is false */
use_web_search?: boolean;
/** Optional: LLM model to use */
llm_model?: string;
/** Optional: Force source */
force_source?: string;
/** Optional: Whether completion should come before chunks in stream. Default is false */
completion_first?: boolean;
/** Optional: Whether to stream the response. Default is true */
stream_response?: boolean;
/** Optional: Sampling temperature between 0 and 2. Default is 0.5 */
temperature?: number;
/** Optional: Frequency penalty between -2.0 and 2.0. Default is 0.7 */
frequency_penalty?: number;
/** Optional: Presence penalty between -2.0 and 2.0. Default is 0.7 */
presence_penalty?: number;
/** Optional: Maximum tokens to generate */
max_tokens?: number;
/** Optional: Stop tokens (up to 4 sequences) */
stop_tokens?: string[];
}
export class InfioProvider implements BaseLLMProvider {
// private adapter: OpenAIMessageAdapter
// private client: OpenAI
private apiKey: string
private baseUrl: string
constructor(apiKey: string) {
// this.client = new OpenAI({ apiKey, dangerouslyAllowBrowser: true })
// this.adapter = new OpenAIMessageAdapter()
this.apiKey = apiKey
this.baseUrl = INFIO_BASE_URL
}
async generateResponse(
model: LLMModel,
request: LLMRequestNonStreaming,
// options?: LLMOptions,
): Promise<LLMResponseNonStreaming> {
if (!this.apiKey) {
throw new LLMAPIKeyNotSetException(
'OpenAI API key is missing. Please set it in settings menu.',
)
}
try {
const req: InfioRequest = {
messages: request.messages,
stream_response: false,
temperature: request.temperature,
frequency_penalty: request.frequency_penalty,
presence_penalty: request.presence_penalty,
max_tokens: request.max_tokens,
}
const req_options = {
method: 'POST',
headers: {
Authorization: this.apiKey,
"TR-Dataset": "74aaec22-0cf0-4cba-80a5-ae5c0518344e",
'Content-Type': 'application/json'
},
body: JSON.stringify(req)
};
const response = await fetch(this.baseUrl, req_options);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json() as ChatCompletion;
return InfioProvider.parseNonStreamingResponse(data);
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) {
throw new LLMAPIKeyInvalidException(
'OpenAI API key is invalid. Please update it in settings menu.',
)
}
throw error
}
}
async streamResponse(
model: LLMModel,
request: LLMRequestStreaming,
): Promise<AsyncIterable<LLMResponseStreaming>> {
if (!this.apiKey) {
throw new LLMAPIKeyNotSetException(
'OpenAI API key is missing. Please set it in settings menu.',
)
}
try {
const req: InfioRequest = {
llm_model: request.model,
messages: request.messages,
stream_response: true,
temperature: request.temperature,
frequency_penalty: request.frequency_penalty,
presence_penalty: request.presence_penalty,
max_tokens: request.max_tokens,
}
const req_options = {
method: 'POST',
headers: {
Authorization: this.apiKey,
"TR-Dataset": "74aaec22-0cf0-4cba-80a5-ae5c0518344e",
"Content-Type": "application/json"
},
body: JSON.stringify(req)
};
const response = await fetch(this.baseUrl, req_options);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
if (!response.body) {
throw new Error('Response body is null');
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
return {
[Symbol.asyncIterator]: async function* () {
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n').filter(line => line.trim());
for (const line of lines) {
if (line.startsWith('data: ')) {
const jsonData = JSON.parse(line.slice(6)) as ChatCompletionChunk;
if (!jsonData || typeof jsonData !== 'object' || !('choices' in jsonData)) {
throw new Error('Invalid chunk format received');
}
yield InfioProvider.parseStreamingResponseChunk(jsonData);
}
}
}
} finally {
reader.releaseLock();
}
}
};
} catch (error) {
if (error instanceof OpenAI.AuthenticationError) {
throw new LLMAPIKeyInvalidException(
'OpenAI API key is invalid. Please update it in settings menu.',
)
}
throw error
}
}
static parseNonStreamingResponse(
response: ChatCompletion,
): LLMResponseNonStreaming {
return {
id: response.id,
choices: response.choices.map((choice) => ({
finish_reason: choice.finish_reason,
message: {
content: choice.message.content,
role: choice.message.role,
},
})),
created: response.created,
model: response.model,
object: 'chat.completion',
system_fingerprint: response.system_fingerprint,
usage: response.usage,
}
}
static parseStreamingResponseChunk(
chunk: ChatCompletionChunk,
): LLMResponseStreaming {
return {
id: chunk.id,
choices: chunk.choices.map((choice) => ({
finish_reason: choice.finish_reason ?? null,
delta: {
content: choice.delta.content ?? null,
role: choice.delta.role,
},
})),
created: chunk.created,
model: chunk.model,
object: 'chat.completion.chunk',
system_fingerprint: chunk.system_fingerprint,
usage: chunk.usage ?? undefined,
}
}
}

View File

@ -1,4 +1,4 @@
import { ALIBABA_QWEN_BASE_URL, DEEPSEEK_BASE_URL, GROK_BASE_URL, OPENROUTER_BASE_URL, SILICONFLOW_BASE_URL } from '../../constants' import { ALIBABA_QWEN_BASE_URL, DEEPSEEK_BASE_URL, GROK_BASE_URL, INFIO_BASE_URL, OPENROUTER_BASE_URL, SILICONFLOW_BASE_URL } from '../../constants'
import { ApiProvider, LLMModel } from '../../types/llm/model' import { ApiProvider, LLMModel } from '../../types/llm/model'
import { import {
LLMOptions, LLMOptions,
@ -14,7 +14,6 @@ import { InfioSettings } from '../../types/settings'
import { AnthropicProvider } from './anthropic' import { AnthropicProvider } from './anthropic'
import { GeminiProvider } from './gemini' import { GeminiProvider } from './gemini'
import { GroqProvider } from './groq' import { GroqProvider } from './groq'
import { InfioProvider } from './infio'
import { OllamaProvider } from './ollama' import { OllamaProvider } from './ollama'
import { OpenAIAuthenticatedProvider } from './openai' import { OpenAIAuthenticatedProvider } from './openai'
import { OpenAICompatibleProvider } from './openai-compatible' import { OpenAICompatibleProvider } from './openai-compatible'
@ -40,7 +39,7 @@ class LLMManager implements LLMManagerInterface {
private googleProvider: GeminiProvider private googleProvider: GeminiProvider
private groqProvider: GroqProvider private groqProvider: GroqProvider
private grokProvider: OpenAICompatibleProvider private grokProvider: OpenAICompatibleProvider
private infioProvider: InfioProvider private infioProvider: OpenAICompatibleProvider
private openrouterProvider: OpenAICompatibleProvider private openrouterProvider: OpenAICompatibleProvider
private siliconflowProvider: OpenAICompatibleProvider private siliconflowProvider: OpenAICompatibleProvider
private alibabaQwenProvider: OpenAICompatibleProvider private alibabaQwenProvider: OpenAICompatibleProvider
@ -49,7 +48,10 @@ class LLMManager implements LLMManagerInterface {
private isInfioEnabled: boolean private isInfioEnabled: boolean
constructor(settings: InfioSettings) { constructor(settings: InfioSettings) {
this.infioProvider = new InfioProvider(settings.infioProvider.apiKey) this.infioProvider = new OpenAICompatibleProvider(
settings.infioProvider.apiKey,
INFIO_BASE_URL
)
this.openrouterProvider = new OpenAICompatibleProvider( this.openrouterProvider = new OpenAICompatibleProvider(
settings.openrouterProvider.apiKey, settings.openrouterProvider.apiKey,
settings.openrouterProvider.baseUrl && settings.openrouterProvider.useCustomUrl ? settings.openrouterProvider.baseUrl && settings.openrouterProvider.useCustomUrl ?
@ -93,14 +95,14 @@ class LLMManager implements LLMManagerInterface {
request: LLMRequestNonStreaming, request: LLMRequestNonStreaming,
options?: LLMOptions, options?: LLMOptions,
): Promise<LLMResponseNonStreaming> { ): Promise<LLMResponseNonStreaming> {
if (this.isInfioEnabled) { console.log("model", model)
return await this.infioProvider.generateResponse(
model,
request,
)
}
// use custom provider
switch (model.provider) { switch (model.provider) {
case ApiProvider.Infio:
return await this.infioProvider.generateResponse(
model,
request,
options,
)
case ApiProvider.OpenRouter: case ApiProvider.OpenRouter:
return await this.openrouterProvider.generateResponse( return await this.openrouterProvider.generateResponse(
model, model,
@ -169,11 +171,9 @@ class LLMManager implements LLMManagerInterface {
request: LLMRequestStreaming, request: LLMRequestStreaming,
options?: LLMOptions, options?: LLMOptions,
): Promise<AsyncIterable<LLMResponseStreaming>> { ): Promise<AsyncIterable<LLMResponseStreaming>> {
if (this.isInfioEnabled) {
return await this.infioProvider.streamResponse(model, request)
}
// use custom provider
switch (model.provider) { switch (model.provider) {
case ApiProvider.Infio:
return await this.infioProvider.streamResponse(model, request, options)
case ApiProvider.OpenRouter: case ApiProvider.OpenRouter:
return await this.openrouterProvider.streamResponse(model, request, options) return await this.openrouterProvider.streamResponse(model, request, options)
case ApiProvider.SiliconFlow: case ApiProvider.SiliconFlow:

View File

@ -171,6 +171,7 @@ const CustomProviderSettings: React.FC<CustomProviderSettingsProps> = ({ plugin,
<div className="infio-llm-setting-divider"></div> <div className="infio-llm-setting-divider"></div>
<ComboBoxComponent <ComboBoxComponent
name={t("settings.Models.chatModel")} name={t("settings.Models.chatModel")}
settings={settings}
provider={settings.chatModelProvider || currProvider} provider={settings.chatModelProvider || currProvider}
modelId={settings.chatModelId} modelId={settings.chatModelId}
updateModel={updateChatModelId} updateModel={updateChatModelId}
@ -178,6 +179,7 @@ const CustomProviderSettings: React.FC<CustomProviderSettingsProps> = ({ plugin,
<div className="infio-llm-setting-divider"></div> <div className="infio-llm-setting-divider"></div>
<ComboBoxComponent <ComboBoxComponent
name={t("settings.Models.autocompleteModel")} name={t("settings.Models.autocompleteModel")}
settings={settings}
provider={settings.applyModelProvider || currProvider} provider={settings.applyModelProvider || currProvider}
modelId={settings.applyModelId} modelId={settings.applyModelId}
updateModel={updateApplyModelId} updateModel={updateApplyModelId}
@ -185,6 +187,7 @@ const CustomProviderSettings: React.FC<CustomProviderSettingsProps> = ({ plugin,
<div className="infio-llm-setting-divider"></div> <div className="infio-llm-setting-divider"></div>
<ComboBoxComponent <ComboBoxComponent
name={t("settings.Models.embeddingModel")} name={t("settings.Models.embeddingModel")}
settings={settings}
provider={settings.embeddingModelProvider || ApiProvider.Google} provider={settings.embeddingModelProvider || ApiProvider.Google}
modelId={settings.embeddingModelId} modelId={settings.embeddingModelId}
isEmbedding={true} isEmbedding={true}

View File

@ -3,6 +3,7 @@ import Fuse, { FuseResult } from "fuse.js";
import React, { useEffect, useMemo, useRef, useState } from "react"; import React, { useEffect, useMemo, useRef, useState } from "react";
import { ApiProvider } from "../../types/llm/model"; import { ApiProvider } from "../../types/llm/model";
import { InfioSettings } from "../../types/settings";
// import { PROVIDERS } from '../constants'; // import { PROVIDERS } from '../constants';
import { GetAllProviders, GetEmbeddingProviderModelIds, GetEmbeddingProviders, GetProviderModelIds } from "../../utils/api"; import { GetAllProviders, GetEmbeddingProviderModelIds, GetEmbeddingProviders, GetProviderModelIds } from "../../utils/api";
@ -149,6 +150,7 @@ export type ComboBoxComponentProps = {
name: string; name: string;
provider: ApiProvider; provider: ApiProvider;
modelId: string; modelId: string;
settings?: InfioSettings | null;
isEmbedding?: boolean, isEmbedding?: boolean,
updateModel: (provider: ApiProvider, modelId: string) => void; updateModel: (provider: ApiProvider, modelId: string) => void;
}; };
@ -157,6 +159,7 @@ export const ComboBoxComponent: React.FC<ComboBoxComponentProps> = ({
name, name,
provider, provider,
modelId, modelId,
settings = null,
isEmbedding = false, isEmbedding = false,
updateModel, updateModel,
}) => { }) => {
@ -177,7 +180,7 @@ export const ComboBoxComponent: React.FC<ComboBoxComponentProps> = ({
const fetchModelIds = async () => { const fetchModelIds = async () => {
const ids = isEmbedding const ids = isEmbedding
? GetEmbeddingProviderModelIds(modelProvider) ? GetEmbeddingProviderModelIds(modelProvider)
: await GetProviderModelIds(modelProvider); : await GetProviderModelIds(modelProvider, settings);
setModelIds(ids); setModelIds(ids);
}; };

View File

@ -1,5 +1,6 @@
import { OPENROUTER_BASE_URL } from '../constants' import { INFIO_BASE_URL, OPENROUTER_BASE_URL } from '../constants'
import { ApiProvider } from '../types/llm/model' import { ApiProvider } from '../types/llm/model'
import { InfioSettings } from '../types/settings'
export interface ModelInfo { export interface ModelInfo {
maxTokens?: number maxTokens?: number
@ -14,6 +15,15 @@ export interface ModelInfo {
description?: string description?: string
reasoningEffort?: string, reasoningEffort?: string,
thinking?: boolean thinking?: boolean
maxThinkingTokens?: number
supportsReasoningBudget?: boolean
requiredReasoningBudget?: boolean
tiers?: readonly {
readonly contextWindow: number,
readonly inputPrice: number,
readonly outputPrice: number,
readonly cacheReadsPrice: number,
}[]
} }
export interface EmbeddingModelInfo { export interface EmbeddingModelInfo {
@ -21,89 +31,13 @@ export interface EmbeddingModelInfo {
description?: string description?: string
} }
// Infio
// https://infio.app/pricing
export type InfioModelId = keyof typeof infioModels
export const infioDefaultModelId: InfioModelId = "deepseek-chat"
export const infioModels = {
"deepseek-chat": {
maxTokens: 8_000,
contextWindow: 64_000,
supportsImages: false,
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
outputPrice: 0.28,
cacheWritesPrice: 0.14,
cacheReadsPrice: 0.014,
},
"deepseek-reasoner": {
maxTokens: 8_000,
contextWindow: 64_000,
supportsImages: false,
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
outputPrice: 2.19,
cacheWritesPrice: 0.55,
cacheReadsPrice: 0.14,
},
"o3-mini": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 1.1,
outputPrice: 4.4,
},
// don't support tool use yet
o1: {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 15,
outputPrice: 60,
},
"o1-preview": {
maxTokens: 32_768,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 15,
outputPrice: 60,
},
"o1-mini": {
maxTokens: 65_536,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 1.1,
outputPrice: 4.4,
},
"gpt-4o": {
maxTokens: 4_096,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 2.5,
outputPrice: 10,
},
"gpt-4o-mini": {
maxTokens: 16_384,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0.15,
outputPrice: 0.6,
},
} as const satisfies Record<string, ModelInfo>
// Anthropic // Anthropic
// https://docs.anthropic.com/en/docs/about-claude/models // https://docs.anthropic.com/en/docs/about-claude/models
export type AnthropicModelId = keyof typeof anthropicModels export type AnthropicModelId = keyof typeof anthropicModels
export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219" export const anthropicDefaultModelId: AnthropicModelId = "claude-sonnet-4-20250514"
export const anthropicModels = { export const anthropicModels = {
"claude-3-7-sonnet-20250219:thinking": { "claude-sonnet-4-20250514": {
maxTokens: 128_000, maxTokens: 64_000, // Overridden to 8k if `enableReasoningEffort` is false.
contextWindow: 200_000, contextWindow: 200_000,
supportsImages: true, supportsImages: true,
supportsComputerUse: true, supportsComputerUse: true,
@ -112,10 +46,35 @@ export const anthropicModels = {
outputPrice: 15.0, // $15 per million output tokens outputPrice: 15.0, // $15 per million output tokens
cacheWritesPrice: 3.75, // $3.75 per million tokens cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReadsPrice: 0.3, // $0.30 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens
thinking: true, supportsReasoningBudget: true,
},
"claude-opus-4-20250514": {
maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false.
contextWindow: 200_000,
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 15.0, // $15 per million input tokens
outputPrice: 75.0, // $75 per million output tokens
cacheWritesPrice: 18.75, // $18.75 per million tokens
cacheReadsPrice: 1.5, // $1.50 per million tokens
supportsReasoningBudget: true,
},
"claude-3-7-sonnet-20250219:thinking": {
maxTokens: 128_000, // Unlocked by passing `beta` flag to the model. Otherwise, it's 64k.
contextWindow: 200_000,
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 3.0, // $3 per million input tokens
outputPrice: 15.0, // $15 per million output tokens
cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReadsPrice: 0.3, // $0.30 per million tokens
supportsReasoningBudget: true,
requiredReasoningBudget: true,
}, },
"claude-3-7-sonnet-20250219": { "claude-3-7-sonnet-20250219": {
maxTokens: 8192, maxTokens: 8192, // Since we already have a `:thinking` virtual model we aren't setting `supportsReasoningBudget: true` here.
contextWindow: 200_000, contextWindow: 200_000,
supportsImages: true, supportsImages: true,
supportsComputerUse: true, supportsComputerUse: true,
@ -124,7 +83,6 @@ export const anthropicModels = {
outputPrice: 15.0, // $15 per million output tokens outputPrice: 15.0, // $15 per million output tokens
cacheWritesPrice: 3.75, // $3.75 per million tokens cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReadsPrice: 0.3, // $0.30 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens
thinking: false,
}, },
"claude-3-5-sonnet-20241022": { "claude-3-5-sonnet-20241022": {
maxTokens: 8192, maxTokens: 8192,
@ -167,10 +125,71 @@ export const anthropicModels = {
cacheWritesPrice: 0.3, cacheWritesPrice: 0.3,
cacheReadsPrice: 0.03, cacheReadsPrice: 0.03,
}, },
} as const satisfies Record<string, ModelInfo> } as const satisfies Record<string, ModelInfo> // as const assertion makes the object
// Infio
export const infioDefaultModelId = "deepseek/deepseek-v3" // will always exist
export const infioDefaultModelInfo: ModelInfo = {
maxTokens: 8192,
contextWindow: 65_536,
supportsImages: false,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 0.272,
outputPrice: 1.088,
cacheWritesPrice: 0.14,
cacheReadsPrice: 0.014,
}
let infioModelsCache: Record<string, ModelInfo> | null = null;
async function fetchInfioModels(apiKey?: string): Promise<Record<string, ModelInfo>> {
if (infioModelsCache) {
return infioModelsCache;
}
try {
const headers: Record<string, string> = {
'Content-Type': 'application/json'
};
// 添加Authorization请求头使用Bearer格式如果有API密钥的话
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
}
const response = await fetch(INFIO_BASE_URL + "/model_group/info", {
method: 'GET',
headers: headers
});
const data = await response.json();
const models: Record<string, ModelInfo> = {};
if (data?.data) {
for (const model of data.data) {
models[model.model_group] = {
maxTokens: model.max_output_tokens,
contextWindow: model.max_input_tokens,
supportsImages: false,
supportsPromptCache: false,
inputPrice: model.input_cost_per_token ? model.input_cost_per_token * 1000000 : 0,
outputPrice: model.output_cost_per_token ? model.output_cost_per_token * 1000000 : 0,
};
}
}
infioModelsCache = models;
return models;
} catch (error) {
console.error('Failed to fetch Infio models:', error);
// 如果出错,返回默认模型
return {
[infioDefaultModelId]: infioDefaultModelInfo
};
}
}
// OpenRouter // OpenRouter
// https://openrouter.ai/models?order=newest&supported_parameters=tools // https://openrouter.ai/models?order=newest&supported_parameters=tools
export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet" // will always exist in openRouterModels export const openRouterDefaultModelId = "anthropic/claude-sonnet-4" // will always exist in openRouterModels
export const openRouterDefaultModelInfo: ModelInfo = { export const openRouterDefaultModelInfo: ModelInfo = {
maxTokens: 8192, maxTokens: 8192,
contextWindow: 200_000, contextWindow: 200_000,
@ -222,8 +241,31 @@ async function fetchOpenRouterModels(): Promise<Record<string, ModelInfo>> {
// Gemini // Gemini
// https://ai.google.dev/gemini-api/docs/models/gemini // https://ai.google.dev/gemini-api/docs/models/gemini
export type GeminiModelId = keyof typeof geminiModels export type GeminiModelId = keyof typeof geminiModels
export const geminiDefaultModelId: GeminiModelId = "gemini-2.5-flash-preview-04-17" export const geminiDefaultModelId: GeminiModelId = "gemini-2.5-flash-preview-05-20"
export const geminiModels = { export const geminiModels = {
"gemini-2.5-flash-preview-05-20:thinking": {
maxTokens: 65_535,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0.15,
outputPrice: 3.5,
cacheReadsPrice: 0.0375,
cacheWritesPrice: 1.0,
maxThinkingTokens: 24_576,
supportsReasoningBudget: true,
requiredReasoningBudget: true,
},
"gemini-2.5-flash-preview-05-20": {
maxTokens: 65_535,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0.15,
outputPrice: 0.6,
cacheReadsPrice: 0.0375,
cacheWritesPrice: 1.0,
},
"gemini-2.5-flash-preview-04-17:thinking": { "gemini-2.5-flash-preview-04-17:thinking": {
maxTokens: 65_535, maxTokens: 65_535,
contextWindow: 1_048_576, contextWindow: 1_048_576,
@ -231,8 +273,9 @@ export const geminiModels = {
supportsPromptCache: false, supportsPromptCache: false,
inputPrice: 0.15, inputPrice: 0.15,
outputPrice: 3.5, outputPrice: 3.5,
thinking: true, maxThinkingTokens: 24_576,
// maxThinkingTokens: 24_576, supportsReasoningBudget: true,
requiredReasoningBudget: true,
}, },
"gemini-2.5-flash-preview-04-17": { "gemini-2.5-flash-preview-04-17": {
maxTokens: 65_535, maxTokens: 65_535,
@ -241,7 +284,6 @@ export const geminiModels = {
supportsPromptCache: false, supportsPromptCache: false,
inputPrice: 0.15, inputPrice: 0.15,
outputPrice: 0.6, outputPrice: 0.6,
thinking: false,
}, },
"gemini-2.5-pro-exp-03-25": { "gemini-2.5-pro-exp-03-25": {
maxTokens: 65_535, maxTokens: 65_535,
@ -260,6 +302,44 @@ export const geminiModels = {
outputPrice: 15, outputPrice: 15,
cacheReadsPrice: 0.625, cacheReadsPrice: 0.625,
cacheWritesPrice: 4.5, cacheWritesPrice: 4.5,
tiers: [
{
contextWindow: 200_000,
inputPrice: 1.25,
outputPrice: 10,
cacheReadsPrice: 0.31,
},
{
contextWindow: Infinity,
inputPrice: 2.5,
outputPrice: 15,
cacheReadsPrice: 0.625,
},
],
},
"gemini-2.5-pro-preview-05-06": {
maxTokens: 65_535,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
outputPrice: 15,
cacheReadsPrice: 0.625,
cacheWritesPrice: 4.5,
tiers: [
{
contextWindow: 200_000,
inputPrice: 1.25,
outputPrice: 10,
cacheReadsPrice: 0.31,
},
{
contextWindow: Infinity,
inputPrice: 2.5,
outputPrice: 15,
cacheReadsPrice: 0.625,
},
],
}, },
"gemini-2.0-flash-001": { "gemini-2.0-flash-001": {
maxTokens: 8192, maxTokens: 8192,
@ -315,9 +395,25 @@ export const geminiModels = {
maxTokens: 8192, maxTokens: 8192,
contextWindow: 1_048_576, contextWindow: 1_048_576,
supportsImages: true, supportsImages: true,
supportsPromptCache: false, supportsPromptCache: true,
inputPrice: 0, inputPrice: 0.15, // This is the pricing for prompts above 128k tokens.
outputPrice: 0, outputPrice: 0.6,
cacheReadsPrice: 0.0375,
cacheWritesPrice: 1.0,
tiers: [
{
contextWindow: 128_000,
inputPrice: 0.075,
outputPrice: 0.3,
cacheReadsPrice: 0.01875,
},
{
contextWindow: Infinity,
inputPrice: 0.15,
outputPrice: 0.6,
cacheReadsPrice: 0.0375,
},
],
}, },
"gemini-1.5-flash-exp-0827": { "gemini-1.5-flash-exp-0827": {
maxTokens: 8192, maxTokens: 8192,
@ -360,6 +456,7 @@ export const geminiModels = {
outputPrice: 0, outputPrice: 0,
}, },
} as const satisfies Record<string, ModelInfo> } as const satisfies Record<string, ModelInfo>
export const geminiEmbeddingModels = { export const geminiEmbeddingModels = {
"text-embedding-004": { "text-embedding-004": {
dimensions: 768, dimensions: 768,
@ -482,8 +579,8 @@ export const deepSeekModels = {
contextWindow: 64_000, contextWindow: 64_000,
supportsImages: false, supportsImages: false,
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this) inputPrice: 0.272, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
outputPrice: 0.28, outputPrice: 1.088,
cacheWritesPrice: 0.14, cacheWritesPrice: 0.14,
cacheReadsPrice: 0.014, cacheReadsPrice: 0.014,
}, },
@ -1505,10 +1602,46 @@ export const GetEmbeddingProviders = (): ApiProvider[] => {
} }
// Get all models for a provider // Get all models for a provider
export const GetProviderModels = async (provider: ApiProvider): Promise<Record<string, ModelInfo>> => { export const GetProviderModels = async (provider: ApiProvider, settings?: InfioSettings): Promise<Record<string, ModelInfo>> => {
switch (provider) { switch (provider) {
case ApiProvider.Infio: case ApiProvider.Infio: {
return infioModels const apiKey = settings?.infioProvider?.apiKey
return await fetchInfioModels(apiKey)
}
case ApiProvider.OpenRouter:
return await fetchOpenRouterModels()
case ApiProvider.OpenAI:
return openAiNativeModels
case ApiProvider.AlibabaQwen:
return qwenModels
case ApiProvider.SiliconFlow:
return siliconFlowModels
case ApiProvider.Anthropic:
return anthropicModels
case ApiProvider.Deepseek:
return deepSeekModels
case ApiProvider.Google:
return geminiModels
case ApiProvider.Groq:
return groqModels
case ApiProvider.Grok:
return grokModels
case ApiProvider.Ollama:
return {}
case ApiProvider.OpenAICompatible:
return {}
default:
return {}
}
}
// Get all models for a provider with settings (needed for providers that require API keys)
export const GetProviderModelsWithSettings = async (provider: ApiProvider, settings?: InfioSettings): Promise<Record<string, ModelInfo>> => {
switch (provider) {
case ApiProvider.Infio: {
const apiKey = settings?.infioProvider?.apiKey
return await fetchInfioModels(apiKey)
}
case ApiProvider.OpenRouter: case ApiProvider.OpenRouter:
return await fetchOpenRouterModels() return await fetchOpenRouterModels()
case ApiProvider.OpenAI: case ApiProvider.OpenAI:
@ -1537,8 +1670,8 @@ export const GetProviderModels = async (provider: ApiProvider): Promise<Record<s
} }
// Get all model ids for a provider // Get all model ids for a provider
export const GetProviderModelIds = async (provider: ApiProvider): Promise<string[]> => { export const GetProviderModelIds = async (provider: ApiProvider, settings?: InfioSettings): Promise<string[]> => {
const models = await GetProviderModels(provider) const models = await GetProviderModels(provider, settings)
return Object.keys(models) return Object.keys(models)
} }