import { OPENROUTER_BASE_URL } from '../constants' import { ApiProvider } from '../types/llm/model' export interface ModelInfo { maxTokens?: number contextWindow?: number supportsImages?: boolean supportsComputerUse?: boolean supportsPromptCache: boolean // this value is hardcoded for now inputPrice?: number outputPrice?: number cacheWritesPrice?: number cacheReadsPrice?: number description?: string reasoningEffort?: string, thinking?: boolean } export interface EmbeddingModelInfo { dimensions: number description?: string } // Infio // https://infio.app/pricing export type InfioModelId = keyof typeof infioModels export const infioDefaultModelId: InfioModelId = "deepseek-chat" export const infioModels = { "deepseek-chat": { maxTokens: 8_000, contextWindow: 64_000, supportsImages: false, supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this) outputPrice: 0.28, cacheWritesPrice: 0.14, cacheReadsPrice: 0.014, }, "deepseek-reasoner": { maxTokens: 8_000, contextWindow: 64_000, supportsImages: false, supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this) outputPrice: 2.19, cacheWritesPrice: 0.55, cacheReadsPrice: 0.14, }, "o3-mini": { maxTokens: 100_000, contextWindow: 200_000, supportsImages: false, supportsPromptCache: false, inputPrice: 1.1, outputPrice: 4.4, }, // don't support tool use yet o1: { maxTokens: 100_000, contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 15, outputPrice: 60, }, "o1-preview": { maxTokens: 32_768, contextWindow: 128_000, supportsImages: true, supportsPromptCache: false, inputPrice: 15, outputPrice: 60, }, "o1-mini": { maxTokens: 65_536, contextWindow: 128_000, supportsImages: true, supportsPromptCache: false, inputPrice: 1.1, outputPrice: 4.4, }, "gpt-4o": { maxTokens: 4_096, contextWindow: 128_000, supportsImages: true, supportsPromptCache: false, inputPrice: 2.5, outputPrice: 10, }, "gpt-4o-mini": { maxTokens: 16_384, contextWindow: 128_000, supportsImages: true, supportsPromptCache: false, inputPrice: 0.15, outputPrice: 0.6, }, } as const satisfies Record // Anthropic // https://docs.anthropic.com/en/docs/about-claude/models export type AnthropicModelId = keyof typeof anthropicModels export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219" export const anthropicModels = { "claude-3-7-sonnet-20250219:thinking": { maxTokens: 128_000, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 3.0, // $3 per million input tokens outputPrice: 15.0, // $15 per million output tokens cacheWritesPrice: 3.75, // $3.75 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens thinking: true, }, "claude-3-7-sonnet-20250219": { maxTokens: 8192, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 3.0, // $3 per million input tokens outputPrice: 15.0, // $15 per million output tokens cacheWritesPrice: 3.75, // $3.75 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens thinking: false, }, "claude-3-5-sonnet-20241022": { maxTokens: 8192, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 3.0, // $3 per million input tokens outputPrice: 15.0, // $15 per million output tokens cacheWritesPrice: 3.75, // $3.75 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens }, "claude-3-5-haiku-20241022": { maxTokens: 8192, contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, inputPrice: 1.0, outputPrice: 5.0, cacheWritesPrice: 1.25, cacheReadsPrice: 0.1, }, "claude-3-opus-20240229": { maxTokens: 4096, contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, inputPrice: 15.0, outputPrice: 75.0, cacheWritesPrice: 18.75, cacheReadsPrice: 1.5, }, "claude-3-haiku-20240307": { maxTokens: 4096, contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, inputPrice: 0.25, outputPrice: 1.25, cacheWritesPrice: 0.3, cacheReadsPrice: 0.03, }, } as const satisfies Record // OpenRouter // https://openrouter.ai/models?order=newest&supported_parameters=tools export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet" // will always exist in openRouterModels export const openRouterDefaultModelInfo: ModelInfo = { maxTokens: 8192, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 3.0, outputPrice: 15.0, cacheWritesPrice: 3.75, cacheReadsPrice: 0.3, description: "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal", } let openRouterModelsCache: Record | null = null; async function fetchOpenRouterModels(): Promise> { if (openRouterModelsCache) { return openRouterModelsCache; } try { const response = await fetch(OPENROUTER_BASE_URL + "/models"); const data = await response.json(); const models: Record = {}; if (data?.data) { for (const model of data.data) { models[model.id] = { maxTokens: model.top_provider?.max_completion_tokens ?? model.context_length, contextWindow: model.context_length, supportsImages: model.architecture?.modality?.includes("image") ?? false, supportsPromptCache: false, inputPrice: model.pricing?.prompt ?? 0, outputPrice: model.pricing?.completion ?? 0, description: model.description, }; } } openRouterModelsCache = models; return models; } catch (error) { console.error('Failed to fetch OpenRouter models:', error); return { [openRouterDefaultModelId]: openRouterDefaultModelInfo }; } } // Gemini // https://ai.google.dev/gemini-api/docs/models/gemini export type GeminiModelId = keyof typeof geminiModels export const geminiDefaultModelId: GeminiModelId = "gemini-2.5-flash-preview-04-17" export const geminiModels = { "gemini-2.5-flash-preview-04-17:thinking": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0.15, outputPrice: 3.5, thinking: true, // maxThinkingTokens: 24_576, }, "gemini-2.5-flash-preview-04-17": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0.15, outputPrice: 0.6, thinking: false, }, "gemini-2.5-pro-exp-03-25": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-2.5-pro-preview-03-25": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, cacheReadsPrice: 0.625, cacheWritesPrice: 4.5, }, "gemini-2.0-flash-001": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, inputPrice: 0.1, outputPrice: 0.4, cacheReadsPrice: 0.025, cacheWritesPrice: 1.0, }, "gemini-2.0-flash-lite-preview-02-05": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-2.0-pro-exp-02-05": { maxTokens: 8192, contextWindow: 2_097_152, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-2.0-flash-thinking-exp-01-21": { maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-2.0-flash-thinking-exp-1219": { maxTokens: 8192, contextWindow: 32_767, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-2.0-flash-exp": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-1.5-flash-002": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-1.5-flash-exp-0827": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-1.5-flash-8b-exp-0827": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-1.5-pro-002": { maxTokens: 8192, contextWindow: 2_097_152, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-1.5-pro-exp-0827": { maxTokens: 8192, contextWindow: 2_097_152, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-exp-1206": { maxTokens: 8192, contextWindow: 2_097_152, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, } as const satisfies Record export const geminiEmbeddingModels = { "text-embedding-004": { dimensions: 768, description: "The text-embedding-004 model can generate advanced embeddings for words, phrases, and sentences. The resulting embeddings can then be used for tasks such as semantic search, text classification, clustering, and more." } } as const satisfies Record // OpenAI Native // https://openai.com/api/pricing/ export type OpenAiNativeModelId = keyof typeof openAiNativeModels export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o" export const openAiNativeModels = { // don't support tool use yet "o3-mini": { maxTokens: 100_000, contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, inputPrice: 1.1, outputPrice: 4.4, reasoningEffort: "medium", }, "o3-mini-high": { maxTokens: 100_000, contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, inputPrice: 1.1, outputPrice: 4.4, reasoningEffort: "high", }, "o3-mini-low": { maxTokens: 100_000, contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, inputPrice: 1.1, outputPrice: 4.4, reasoningEffort: "low", }, "o1-pro": { maxTokens: 100_000, contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 150, outputPrice: 600, }, o1: { maxTokens: 100_000, contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, inputPrice: 15, outputPrice: 60, }, "o1-preview": { maxTokens: 32_768, contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, inputPrice: 15, outputPrice: 60, }, "o1-mini": { maxTokens: 65_536, contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, inputPrice: 1.1, outputPrice: 4.4, }, "gpt-4.5-preview": { maxTokens: 16_384, contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, inputPrice: 75, outputPrice: 150, }, "gpt-4o": { maxTokens: 16_384, contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, inputPrice: 2.5, outputPrice: 10, }, "gpt-4o-mini": { maxTokens: 16_384, contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, inputPrice: 0.15, outputPrice: 0.6, }, } as const satisfies Record export const openAINativeEmbeddingModels = { "text-embedding-3-small": { dimensions: 1536, description: "Increased performance over 2nd generation ada embedding model" }, "text-embedding-3-large": { dimensions: 3072, description: "Most capable embedding model for both English and non-English tasks" }, "text-embedding-ada-002": { dimensions: 1536, description: "Most capable 2nd generation embedding model, replacing 16 first generation models" } } as const satisfies Record // DeepSeek // https://api-docs.deepseek.com/quick_start/pricing export type DeepSeekModelId = keyof typeof deepSeekModels export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat" export const deepSeekModels = { "deepseek-chat": { maxTokens: 8_000, contextWindow: 64_000, supportsImages: false, supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this) outputPrice: 0.28, cacheWritesPrice: 0.14, cacheReadsPrice: 0.014, }, "deepseek-reasoner": { maxTokens: 8_000, contextWindow: 64_000, supportsImages: false, supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this) outputPrice: 2.19, cacheWritesPrice: 0.55, cacheReadsPrice: 0.14, }, } as const satisfies Record // Qwen // https://help.aliyun.com/zh/model-studio/getting-started/ export type QwenModelId = keyof typeof qwenModels export const qwenDefaultModelId: QwenModelId = "qwen-max-latest" export const qwenModels = { "qwen3-235b-a22b": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, cacheWritesPrice: 0.002, cacheReadsPrice: 0.006, }, "qwen3-32b": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, }, "qwen3-30b-a3b": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, }, "qwen3-14b": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, }, "qwen3-8b": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, }, "qwen2.5-coder-32b-instruct": { maxTokens: 8_192, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, cacheWritesPrice: 0.002, cacheReadsPrice: 0.006, }, "qwen2.5-coder-14b-instruct": { maxTokens: 8_192, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, cacheWritesPrice: 0.002, cacheReadsPrice: 0.006, }, "qwen2.5-coder-7b-instruct": { maxTokens: 8_192, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.001, outputPrice: 0.002, cacheWritesPrice: 0.001, cacheReadsPrice: 0.002, }, "qwen2.5-coder-3b-instruct": { maxTokens: 8_192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.0, outputPrice: 0.0, cacheWritesPrice: 0.0, cacheReadsPrice: 0.0, }, "qwen2.5-coder-1.5b-instruct": { maxTokens: 8_192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.0, outputPrice: 0.0, cacheWritesPrice: 0.0, cacheReadsPrice: 0.0, }, "qwen2.5-coder-0.5b-instruct": { maxTokens: 8_192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.0, outputPrice: 0.0, cacheWritesPrice: 0.0, cacheReadsPrice: 0.0, }, "qwen-coder-plus-latest": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 3.5, outputPrice: 7, cacheWritesPrice: 3.5, cacheReadsPrice: 7, }, "qwen-plus-latest": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.8, outputPrice: 2, cacheWritesPrice: 0.8, cacheReadsPrice: 0.2, }, "qwen-turbo-latest": { maxTokens: 1_000_000, contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: false, inputPrice: 0.8, outputPrice: 2, cacheWritesPrice: 0.8, cacheReadsPrice: 2, }, "qwen-max-latest": { maxTokens: 30_720, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.4, outputPrice: 9.6, cacheWritesPrice: 2.4, cacheReadsPrice: 9.6, }, "qwq-plus-latest": { maxTokens: 8_192, contextWindow: 131_071, supportsImages: false, supportsPromptCache: false, inputPrice: 0.0, outputPrice: 0.0, cacheWritesPrice: 0.0, cacheReadsPrice: 0.0, }, "qwq-plus": { maxTokens: 8_192, contextWindow: 131_071, supportsImages: false, supportsPromptCache: false, inputPrice: 0.0, outputPrice: 0.0, cacheWritesPrice: 0.0, cacheReadsPrice: 0.0, }, "qwen-coder-plus": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 3.5, outputPrice: 7, cacheWritesPrice: 3.5, cacheReadsPrice: 7, }, "qwen-plus": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.8, outputPrice: 2, cacheWritesPrice: 0.8, cacheReadsPrice: 0.2, }, "qwen-turbo": { maxTokens: 1_000_000, contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: false, inputPrice: 0.3, outputPrice: 0.6, cacheWritesPrice: 0.3, cacheReadsPrice: 0.6, }, "qwen-max": { maxTokens: 30_720, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.4, outputPrice: 9.6, cacheWritesPrice: 2.4, cacheReadsPrice: 9.6, }, "deepseek-v3": { maxTokens: 8_000, contextWindow: 64_000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.28, cacheWritesPrice: 0.14, cacheReadsPrice: 0.014, }, "deepseek-r1": { maxTokens: 8_000, contextWindow: 64_000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 2.19, cacheWritesPrice: 0.55, cacheReadsPrice: 0.14, }, "qwen-vl-max": { maxTokens: 30_720, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 3, outputPrice: 9, cacheWritesPrice: 3, cacheReadsPrice: 9, }, "qwen-vl-max-latest": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: true, supportsPromptCache: false, inputPrice: 3, outputPrice: 9, cacheWritesPrice: 3, cacheReadsPrice: 9, }, "qwen-vl-plus": { maxTokens: 6_000, contextWindow: 8_000, supportsImages: true, supportsPromptCache: false, inputPrice: 1.5, outputPrice: 4.5, cacheWritesPrice: 1.5, cacheReadsPrice: 4.5, }, "qwen-vl-plus-latest": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: true, supportsPromptCache: false, inputPrice: 1.5, outputPrice: 4.5, cacheWritesPrice: 1.5, cacheReadsPrice: 4.5, }, } as const satisfies Record export const qwenEmbeddingModels = { "text-embedding-v3": { dimensions: 1024, description: "支持50+主流语种,包括中文、英语、西班牙语、法语、葡萄牙语、印尼语、日语、韩语、德语、俄罗斯语等。最大行数20,单行最大处理8,192 Token。支持可选维度:1,024(默认)、768或512。单价:0.0007元/千Token。免费额度:50万Token(有效期180天)。" }, "text-embedding-v2": { dimensions: 1536, description: "支持多种语言,包括中文、英语、西班牙语、法语、葡萄牙语、印尼语、日语、韩语、德语、俄罗斯语。最大行数25,单行最大处理2,048 Token。" }, "text-embedding-v1": { dimensions: 1536, description: "支持中文、英语、西班牙语、法语、葡萄牙语、印尼语。" }, "text-embedding-async-v2": { dimensions: 1536, description: "异步处理大规模文本。支持中文、英语、西班牙语、法语、葡萄牙语、印尼语、日语、韩语、德语、俄罗斯语。" }, "text-embedding-async-v1": { dimensions: 1536, description: "异步处理大规模文本。支持中文、英语、西班牙语、法语、葡萄牙语、印尼语。" } } as const satisfies Record // bytedance volcengine //https://api.volcengine.com/api-docs/view/overview // SiliconFlow // https://docs.siliconflow.cn/ export type SiliconFlowModelId = keyof typeof siliconFlowModels export const siliconFlowDefaultModelId: SiliconFlowModelId = "deepseek-ai/DeepSeek-V3" export const siliconFlowModels = { "01-ai/Yi-1.5-9B-Chat-16K": { maxTokens: 8192, contextWindow: 16_384, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "01-ai/Yi-1.5-34B-Chat-16K": { maxTokens: 8192, contextWindow: 16_384, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "google/gemma-2-9b-it": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "google/gemma-2-27b-it": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "Pro/google/gemma-2-9b-it": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "meta-llama/Meta-Llama-3.1-8B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "Pro/meta-llama/Meta-Llama-3.1-8B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "meta-llama/Meta-Llama-3.1-70B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "meta-llama/Meta-Llama-3.1-405B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 5.0, outputPrice: 10.0, }, "internlm/internlm2_5-20b-chat": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.8, outputPrice: 1.6, }, "Qwen/Qwen2.5-72B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "Qwen/Qwen2.5-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "Qwen/Qwen2.5-14B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.6, outputPrice: 1.2, }, "Qwen/Qwen2.5-32B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "Qwen/Qwen2.5-Coder-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "Qwen/Qwen2.5-VL-32B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "Qwen/Qwen2.5-VL-72B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "TeleAI/TeleChat2": { maxTokens: 4096, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.3, outputPrice: 0.6, }, "Pro/Qwen/Qwen2.5-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "Qwen/Qwen2.5-72B-Instruct-128K": { maxTokens: 8192, contextWindow: 128_000, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "Qwen/Qwen2-VL-72B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 2.5, outputPrice: 5.0, }, "OpenGVLab/InternVL2-26B": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "Pro/BAAI/bge-m3": { maxTokens: 4096, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.3, outputPrice: 0.6, }, "Pro/OpenGVLab/InternVL2-8B": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "Vendor-A/Qwen/Qwen2.5-72B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "Pro/Qwen/Qwen2-VL-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "LoRA/Qwen/Qwen2.5-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "Pro/Qwen/Qwen2.5-Coder-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "LoRA/Qwen/Qwen2.5-72B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "Qwen/Qwen2.5-Coder-32B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "Pro/BAAI/bge-reranker-v2-m3": { maxTokens: 4096, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.3, outputPrice: 0.6, }, "Qwen/QwQ-32B": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "Qwen/QwQ-32B-Preview": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "AIDC-AI/Marco-o1": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "LoRA/Qwen/Qwen2.5-14B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.6, outputPrice: 1.2, }, "LoRA/Qwen/Qwen2.5-32B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "meta-llama/Llama-3.3-70B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "LoRA/meta-llama/Meta-Llama-3.1-8B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "deepseek-ai/deepseek-vl2": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "Qwen/QVQ-72B-Preview": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 2.5, outputPrice: 5.0, }, "deepseek-ai/DeepSeek-V3": { maxTokens: 8000, contextWindow: 64000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.28, cacheWritesPrice: 0.14, cacheReadsPrice: 0.014, }, "deepseek-ai/DeepSeek-R1": { maxTokens: 8000, contextWindow: 64000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 2.19, cacheWritesPrice: 0.55, cacheReadsPrice: 0.14, }, "Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": { maxTokens: 4096, contextWindow: 16_384, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.2, cacheWritesPrice: 0.1, cacheReadsPrice: 0.01, }, "Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.4, cacheWritesPrice: 0.2, cacheReadsPrice: 0.02, }, "Pro/deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.4, cacheWritesPrice: 0.2, cacheReadsPrice: 0.02, }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.6, cacheWritesPrice: 0.3, cacheReadsPrice: 0.03, }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 1.0, cacheWritesPrice: 0.5, cacheReadsPrice: 0.05, }, "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 2.0, cacheWritesPrice: 1.0, cacheReadsPrice: 0.1, }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": { maxTokens: 4096, contextWindow: 16_384, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.2, cacheWritesPrice: 0.1, cacheReadsPrice: 0.01, }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.4, cacheWritesPrice: 0.2, cacheReadsPrice: 0.02, }, "deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.4, cacheWritesPrice: 0.2, cacheReadsPrice: 0.02, }, "Pro/deepseek-ai/DeepSeek-R1": { maxTokens: 8000, contextWindow: 64000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 2.19, cacheWritesPrice: 0.55, cacheReadsPrice: 0.14, }, "Pro/deepseek-ai/DeepSeek-V3": { maxTokens: 8000, contextWindow: 64000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.28, cacheWritesPrice: 0.14, cacheReadsPrice: 0.014, } } as const satisfies Record export const siliconFlowEmbeddingModels = { "BAAI/bge-m3": { dimensions: 1024, description: "BGE-M3 是一个多功能、多语言、多粒度的文本嵌入模型。它支持三种常见的检索功能:密集检索、多向量检索和稀疏检索。该模型可以处理超过100种语言,并且能够处理从短句到长达8192个词元的长文档等不同粒度的输入。BGE-M3在多语言和跨语言检索任务中表现出色,在 MIRACL 和 MKQA 等基准测试中取得了领先结果。它还具有处理长文档检索的能力,在 MLDR 和 NarritiveQA 等数据集上展现了优秀性能" }, "netease-youdao/bce-embedding-base_v1": { dimensions: 768, description: "bce-embedding-base_v1 是由网易有道开发的双语和跨语言嵌入模型。该模型在中英文语义表示和检索任务中表现出色,尤其擅长跨语言场景。它是为检索增强生成(RAG)系统优化的,可以直接应用于教育、医疗、法律等多个领域。该模型不需要特定指令即可使用,能够高效地生成语义向量,为语义搜索和问答系统提供关键支持" }, "BAAI/bge-large-zh-v1.5": { dimensions: 1024, description: "BAAI/bge-large-zh-v1.5 是一个大型中文文本嵌入模型,是 BGE (BAAI General Embedding) 系列的一部分。该模型在 C-MTEB 基准测试中表现出色,在 31 个数据集上的平均得分为 64.53,在检索、语义相似度、文本对分类等多个任务中都取得了优异成绩。它支持最大 512 个 token 的输入长度,适用于各种中文自然语言处理任务,如文本检索、语义相似度计算等" }, "BAAI/bge-large-en-v1.5": { dimensions: 1024, description: "BAAI/bge-large-en-v1.5 是一个大型英文文本嵌入模型,是 BGE (BAAI General Embedding) 系列的一部分。它在 MTEB 基准测试中取得了优异的表现,在 56 个数据集上的平均得分为 64.23,在检索、聚类、文本对分类等多个任务中表现出色。该模型支持最大 512 个 token 的输入长度,适用于各种自然语言处理任务,如文本检索、语义相似度计算等" }, "Pro/BAAI/bge-m3": { dimensions: 1024, description: "BGE-M3 是一个多功能、多语言、多粒度的文本嵌入模型。它支持三种常见的检索功能:密集检索、多向量检索和稀疏检索。该模型可以处理超过100种语言,并且能够处理从短句到长达8192个词元的长文档等不同粒度的输入。BGE-M3在多语言和跨语言检索任务中表现出色,在 MIRACL 和 MKQA 等基准测试中取得了领先结果。它还具有处理长文档检索的能力,在 MLDR 和 NarritiveQA 等数据集上展现了优秀性能" } } as const satisfies Record // Groq // https://console.groq.com/docs/overview export type GroqModelId = keyof typeof groqModels export const groqDefaultModelId: GroqModelId = "llama-3.3-70b-versatile" export const groqModels = { "meta-llama/llama-4-scout-17b-16e-instruct": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "meta-llama/llama-4-maverick-17b-128e-instruct": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "qwen-qwq-32b": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, }, "llama-3.2-1b-preview": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.1-8b-instant": { maxTokens: 4096, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "mixtral-8x7b-32768": { maxTokens: 4096, contextWindow: 32768, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.3-70b-versatile": { maxTokens: 4096, contextWindow: 32768, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-guard-3-8b": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama3-70b-8192": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.3-70b-specdec": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama3-8b-8192": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.2-11b-vision-preview": { maxTokens: 4096, contextWindow: 8192, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "deepseek-r1-distill-llama-70b": { maxTokens: 4096, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.2-3b-preview": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "qwen-2.5-coder-32b": { maxTokens: 4096, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemma2-9b-it": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "deepseek-r1-distill-qwen-32b": { maxTokens: 4096, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "qwen-2.5-32b": { maxTokens: 4096, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.2-90b-vision-preview": { maxTokens: 4096, contextWindow: 8192, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, } as const satisfies Record // Grok // https://docs.x.ai/docs/models export type GrokModelId = keyof typeof grokModels export const grokDefaultModelId: GrokModelId = "grok-3" export const grokModels = { "grok-3": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, }, "grok-3-fast": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, }, "grok-3-mini": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, }, "grok-3-mini-fast": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, }, "grok-2-latest": { maxTokens: 8192, contextWindow: 131072, supportsImages: true, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, }, "grok-2": { maxTokens: 8192, contextWindow: 131072, supportsImages: true, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, } } as const satisfies Record /// helper functions // get all providers export const GetAllProviders = (): ApiProvider[] => { return Object.values(ApiProvider) } export const GetEmbeddingProviders = (): ApiProvider[] => { return [ ApiProvider.OpenAI, ApiProvider.SiliconFlow, ApiProvider.Google, ApiProvider.AlibabaQwen, ApiProvider.OpenAICompatible, ApiProvider.Ollama, ] } // Get all models for a provider export const GetProviderModels = async (provider: ApiProvider): Promise> => { switch (provider) { case ApiProvider.Infio: return infioModels case ApiProvider.OpenRouter: return await fetchOpenRouterModels() case ApiProvider.OpenAI: return openAiNativeModels case ApiProvider.AlibabaQwen: return qwenModels case ApiProvider.SiliconFlow: return siliconFlowModels case ApiProvider.Anthropic: return anthropicModels case ApiProvider.Deepseek: return deepSeekModels case ApiProvider.Google: return geminiModels case ApiProvider.Groq: return groqModels case ApiProvider.Grok: return grokModels case ApiProvider.Ollama: return {} case ApiProvider.OpenAICompatible: return {} default: return {} } } // Get all model ids for a provider export const GetProviderModelIds = async (provider: ApiProvider): Promise => { const models = await GetProviderModels(provider) return Object.keys(models) } /// Embedding models // Get all embedding models for a provider export const GetEmbeddingProviderModels = (provider: ApiProvider): Record => { switch (provider) { case ApiProvider.Google: return geminiEmbeddingModels case ApiProvider.SiliconFlow: return siliconFlowEmbeddingModels case ApiProvider.OpenAI: return openAINativeEmbeddingModels; case ApiProvider.AlibabaQwen: return qwenEmbeddingModels; default: return {} } } // Get all embedding model ids for a provider export const GetEmbeddingProviderModelIds = (provider: ApiProvider): string[] => { return Object.keys(GetEmbeddingProviderModels(provider)) } // Get embedding model info for a provider and model id export const GetEmbeddingModelInfo = (provider: ApiProvider, modelId: string): EmbeddingModelInfo => { const models = GetEmbeddingProviderModels(provider) return models[modelId] }