2025-04-29 18:37:26 +08:00

1571 lines
41 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { OPENROUTER_BASE_URL } from '../constants'
import { ApiProvider } from '../types/llm/model'
export interface ModelInfo {
maxTokens?: number
contextWindow?: number
supportsImages?: boolean
supportsComputerUse?: boolean
supportsPromptCache: boolean // this value is hardcoded for now
inputPrice?: number
outputPrice?: number
cacheWritesPrice?: number
cacheReadsPrice?: number
description?: string
reasoningEffort?: string,
thinking?: boolean
}
export interface EmbeddingModelInfo {
dimensions: number
description?: string
}
// Infio
// https://infio.app/pricing
export type InfioModelId = keyof typeof infioModels
export const infioDefaultModelId: InfioModelId = "deepseek-chat"
export const infioModels = {
"deepseek-chat": {
maxTokens: 8_000,
contextWindow: 64_000,
supportsImages: false,
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
outputPrice: 0.28,
cacheWritesPrice: 0.14,
cacheReadsPrice: 0.014,
},
"deepseek-reasoner": {
maxTokens: 8_000,
contextWindow: 64_000,
supportsImages: false,
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
outputPrice: 2.19,
cacheWritesPrice: 0.55,
cacheReadsPrice: 0.14,
},
"o3-mini": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 1.1,
outputPrice: 4.4,
},
// don't support tool use yet
o1: {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 15,
outputPrice: 60,
},
"o1-preview": {
maxTokens: 32_768,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 15,
outputPrice: 60,
},
"o1-mini": {
maxTokens: 65_536,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 1.1,
outputPrice: 4.4,
},
"gpt-4o": {
maxTokens: 4_096,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 2.5,
outputPrice: 10,
},
"gpt-4o-mini": {
maxTokens: 16_384,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0.15,
outputPrice: 0.6,
},
} as const satisfies Record<string, ModelInfo>
// Anthropic
// https://docs.anthropic.com/en/docs/about-claude/models
export type AnthropicModelId = keyof typeof anthropicModels
export const anthropicDefaultModelId: AnthropicModelId = "claude-3-7-sonnet-20250219"
export const anthropicModels = {
"claude-3-7-sonnet-20250219:thinking": {
maxTokens: 128_000,
contextWindow: 200_000,
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 3.0, // $3 per million input tokens
outputPrice: 15.0, // $15 per million output tokens
cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReadsPrice: 0.3, // $0.30 per million tokens
thinking: true,
},
"claude-3-7-sonnet-20250219": {
maxTokens: 8192,
contextWindow: 200_000,
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 3.0, // $3 per million input tokens
outputPrice: 15.0, // $15 per million output tokens
cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReadsPrice: 0.3, // $0.30 per million tokens
thinking: false,
},
"claude-3-5-sonnet-20241022": {
maxTokens: 8192,
contextWindow: 200_000,
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 3.0, // $3 per million input tokens
outputPrice: 15.0, // $15 per million output tokens
cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReadsPrice: 0.3, // $0.30 per million tokens
},
"claude-3-5-haiku-20241022": {
maxTokens: 8192,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 1.0,
outputPrice: 5.0,
cacheWritesPrice: 1.25,
cacheReadsPrice: 0.1,
},
"claude-3-opus-20240229": {
maxTokens: 4096,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 15.0,
outputPrice: 75.0,
cacheWritesPrice: 18.75,
cacheReadsPrice: 1.5,
},
"claude-3-haiku-20240307": {
maxTokens: 4096,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0.25,
outputPrice: 1.25,
cacheWritesPrice: 0.3,
cacheReadsPrice: 0.03,
},
} as const satisfies Record<string, ModelInfo>
// OpenRouter
// https://openrouter.ai/models?order=newest&supported_parameters=tools
export const openRouterDefaultModelId = "anthropic/claude-3.5-sonnet" // will always exist in openRouterModels
export const openRouterDefaultModelInfo: ModelInfo = {
maxTokens: 8192,
contextWindow: 200_000,
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
description:
"The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal",
}
let openRouterModelsCache: Record<string, ModelInfo> | null = null;
async function fetchOpenRouterModels(): Promise<Record<string, ModelInfo>> {
if (openRouterModelsCache) {
return openRouterModelsCache;
}
try {
const response = await fetch(OPENROUTER_BASE_URL + "/models");
const data = await response.json();
const models: Record<string, ModelInfo> = {};
if (data?.data) {
for (const model of data.data) {
models[model.id] = {
maxTokens: model.top_provider?.max_completion_tokens ?? model.context_length,
contextWindow: model.context_length,
supportsImages: model.architecture?.modality?.includes("image") ?? false,
supportsPromptCache: false,
inputPrice: model.pricing?.prompt ?? 0,
outputPrice: model.pricing?.completion ?? 0,
description: model.description,
};
}
}
openRouterModelsCache = models;
return models;
} catch (error) {
console.error('Failed to fetch OpenRouter models:', error);
return {
[openRouterDefaultModelId]: openRouterDefaultModelInfo
};
}
}
// Gemini
// https://ai.google.dev/gemini-api/docs/models/gemini
export type GeminiModelId = keyof typeof geminiModels
export const geminiDefaultModelId: GeminiModelId = "gemini-2.5-flash-preview-04-17"
export const geminiModels = {
"gemini-2.5-flash-preview-04-17:thinking": {
maxTokens: 65_535,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0.15,
outputPrice: 3.5,
thinking: true,
// maxThinkingTokens: 24_576,
},
"gemini-2.5-flash-preview-04-17": {
maxTokens: 65_535,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0.15,
outputPrice: 0.6,
thinking: false,
},
"gemini-2.5-pro-exp-03-25": {
maxTokens: 65_535,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-2.5-pro-preview-03-25": {
maxTokens: 65_535,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
outputPrice: 15,
cacheReadsPrice: 0.625,
cacheWritesPrice: 4.5,
},
"gemini-2.0-flash-001": {
maxTokens: 8192,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0.1,
outputPrice: 0.4,
cacheReadsPrice: 0.025,
cacheWritesPrice: 1.0,
},
"gemini-2.0-flash-lite-preview-02-05": {
maxTokens: 8192,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-2.0-pro-exp-02-05": {
maxTokens: 8192,
contextWindow: 2_097_152,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-2.0-flash-thinking-exp-01-21": {
maxTokens: 65_536,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-2.0-flash-thinking-exp-1219": {
maxTokens: 8192,
contextWindow: 32_767,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-2.0-flash-exp": {
maxTokens: 8192,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-1.5-flash-002": {
maxTokens: 8192,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-1.5-flash-exp-0827": {
maxTokens: 8192,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-1.5-flash-8b-exp-0827": {
maxTokens: 8192,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-1.5-pro-002": {
maxTokens: 8192,
contextWindow: 2_097_152,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-1.5-pro-exp-0827": {
maxTokens: 8192,
contextWindow: 2_097_152,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemini-exp-1206": {
maxTokens: 8192,
contextWindow: 2_097_152,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
} as const satisfies Record<string, ModelInfo>
export const geminiEmbeddingModels = {
"text-embedding-004": {
dimensions: 768,
description: "The text-embedding-004 model can generate advanced embeddings for words, phrases, and sentences. The resulting embeddings can then be used for tasks such as semantic search, text classification, clustering, and more."
}
} as const satisfies Record<string, EmbeddingModelInfo>
// OpenAI Native
// https://openai.com/api/pricing/
export type OpenAiNativeModelId = keyof typeof openAiNativeModels
export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
export const openAiNativeModels = {
// don't support tool use yet
"o3-mini": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
reasoningEffort: "medium",
},
"o3-mini-high": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
reasoningEffort: "high",
},
"o3-mini-low": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
reasoningEffort: "low",
},
"o1-pro": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 150,
outputPrice: 600,
},
o1: {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 15,
outputPrice: 60,
},
"o1-preview": {
maxTokens: 32_768,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 15,
outputPrice: 60,
},
"o1-mini": {
maxTokens: 65_536,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
},
"gpt-4.5-preview": {
maxTokens: 16_384,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 75,
outputPrice: 150,
},
"gpt-4o": {
maxTokens: 16_384,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.5,
outputPrice: 10,
},
"gpt-4o-mini": {
maxTokens: 16_384,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0.15,
outputPrice: 0.6,
},
} as const satisfies Record<string, ModelInfo>
export const openAINativeEmbeddingModels = {
"text-embedding-3-small": {
dimensions: 1536,
description: "Increased performance over 2nd generation ada embedding model"
},
"text-embedding-3-large": {
dimensions: 3072,
description: "Most capable embedding model for both English and non-English tasks"
},
"text-embedding-ada-002": {
dimensions: 1536,
description: "Most capable 2nd generation embedding model, replacing 16 first generation models"
}
} as const satisfies Record<string, EmbeddingModelInfo>
// DeepSeek
// https://api-docs.deepseek.com/quick_start/pricing
export type DeepSeekModelId = keyof typeof deepSeekModels
export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
export const deepSeekModels = {
"deepseek-chat": {
maxTokens: 8_000,
contextWindow: 64_000,
supportsImages: false,
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
outputPrice: 0.28,
cacheWritesPrice: 0.14,
cacheReadsPrice: 0.014,
},
"deepseek-reasoner": {
maxTokens: 8_000,
contextWindow: 64_000,
supportsImages: false,
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
outputPrice: 2.19,
cacheWritesPrice: 0.55,
cacheReadsPrice: 0.14,
},
} as const satisfies Record<string, ModelInfo>
// Qwen
// https://help.aliyun.com/zh/model-studio/getting-started/
export type QwenModelId = keyof typeof qwenModels
export const qwenDefaultModelId: QwenModelId = "qwen-max-latest"
export const qwenModels = {
"qwen3-235b-a22b": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.002,
outputPrice: 0.006,
cacheWritesPrice: 0.002,
cacheReadsPrice: 0.006,
},
"qwen3-32b": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.002,
outputPrice: 0.006,
},
"qwen3-30b-a3b": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.002,
outputPrice: 0.006,
},
"qwen3-14b": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.002,
outputPrice: 0.006,
},
"qwen3-8b": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.002,
outputPrice: 0.006,
},
"qwen2.5-coder-32b-instruct": {
maxTokens: 8_192,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.002,
outputPrice: 0.006,
cacheWritesPrice: 0.002,
cacheReadsPrice: 0.006,
},
"qwen2.5-coder-14b-instruct": {
maxTokens: 8_192,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.002,
outputPrice: 0.006,
cacheWritesPrice: 0.002,
cacheReadsPrice: 0.006,
},
"qwen2.5-coder-7b-instruct": {
maxTokens: 8_192,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.001,
outputPrice: 0.002,
cacheWritesPrice: 0.001,
cacheReadsPrice: 0.002,
},
"qwen2.5-coder-3b-instruct": {
maxTokens: 8_192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.0,
outputPrice: 0.0,
cacheWritesPrice: 0.0,
cacheReadsPrice: 0.0,
},
"qwen2.5-coder-1.5b-instruct": {
maxTokens: 8_192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.0,
outputPrice: 0.0,
cacheWritesPrice: 0.0,
cacheReadsPrice: 0.0,
},
"qwen2.5-coder-0.5b-instruct": {
maxTokens: 8_192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.0,
outputPrice: 0.0,
cacheWritesPrice: 0.0,
cacheReadsPrice: 0.0,
},
"qwen-coder-plus-latest": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 3.5,
outputPrice: 7,
cacheWritesPrice: 3.5,
cacheReadsPrice: 7,
},
"qwen-plus-latest": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.8,
outputPrice: 2,
cacheWritesPrice: 0.8,
cacheReadsPrice: 0.2,
},
"qwen-turbo-latest": {
maxTokens: 1_000_000,
contextWindow: 1_000_000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.8,
outputPrice: 2,
cacheWritesPrice: 0.8,
cacheReadsPrice: 2,
},
"qwen-max-latest": {
maxTokens: 30_720,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 2.4,
outputPrice: 9.6,
cacheWritesPrice: 2.4,
cacheReadsPrice: 9.6,
},
"qwq-plus-latest": {
maxTokens: 8_192,
contextWindow: 131_071,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.0,
outputPrice: 0.0,
cacheWritesPrice: 0.0,
cacheReadsPrice: 0.0,
},
"qwq-plus": {
maxTokens: 8_192,
contextWindow: 131_071,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.0,
outputPrice: 0.0,
cacheWritesPrice: 0.0,
cacheReadsPrice: 0.0,
},
"qwen-coder-plus": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 3.5,
outputPrice: 7,
cacheWritesPrice: 3.5,
cacheReadsPrice: 7,
},
"qwen-plus": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.8,
outputPrice: 2,
cacheWritesPrice: 0.8,
cacheReadsPrice: 0.2,
},
"qwen-turbo": {
maxTokens: 1_000_000,
contextWindow: 1_000_000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.3,
outputPrice: 0.6,
cacheWritesPrice: 0.3,
cacheReadsPrice: 0.6,
},
"qwen-max": {
maxTokens: 30_720,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 2.4,
outputPrice: 9.6,
cacheWritesPrice: 2.4,
cacheReadsPrice: 9.6,
},
"deepseek-v3": {
maxTokens: 8_000,
contextWindow: 64_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0.28,
cacheWritesPrice: 0.14,
cacheReadsPrice: 0.014,
},
"deepseek-r1": {
maxTokens: 8_000,
contextWindow: 64_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 2.19,
cacheWritesPrice: 0.55,
cacheReadsPrice: 0.14,
},
"qwen-vl-max": {
maxTokens: 30_720,
contextWindow: 32_768,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 3,
outputPrice: 9,
cacheWritesPrice: 3,
cacheReadsPrice: 9,
},
"qwen-vl-max-latest": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 3,
outputPrice: 9,
cacheWritesPrice: 3,
cacheReadsPrice: 9,
},
"qwen-vl-plus": {
maxTokens: 6_000,
contextWindow: 8_000,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 1.5,
outputPrice: 4.5,
cacheWritesPrice: 1.5,
cacheReadsPrice: 4.5,
},
"qwen-vl-plus-latest": {
maxTokens: 129_024,
contextWindow: 131_072,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 1.5,
outputPrice: 4.5,
cacheWritesPrice: 1.5,
cacheReadsPrice: 4.5,
},
} as const satisfies Record<string, ModelInfo>
export const qwenEmbeddingModels = {
"text-embedding-v3": {
dimensions: 1024,
description: "支持50+主流语种包括中文、英语、西班牙语、法语、葡萄牙语、印尼语、日语、韩语、德语、俄罗斯语等。最大行数20单行最大处理8,192 Token。支持可选维度1,024默认、768或512。单价0.0007元/千Token。免费额度50万Token有效期180天。"
},
"text-embedding-v2": {
dimensions: 1536,
description: "支持多种语言包括中文、英语、西班牙语、法语、葡萄牙语、印尼语、日语、韩语、德语、俄罗斯语。最大行数25单行最大处理2,048 Token。"
},
"text-embedding-v1": {
dimensions: 1536,
description: "支持中文、英语、西班牙语、法语、葡萄牙语、印尼语。"
},
"text-embedding-async-v2": {
dimensions: 1536,
description: "异步处理大规模文本。支持中文、英语、西班牙语、法语、葡萄牙语、印尼语、日语、韩语、德语、俄罗斯语。"
},
"text-embedding-async-v1": {
dimensions: 1536,
description: "异步处理大规模文本。支持中文、英语、西班牙语、法语、葡萄牙语、印尼语。"
}
} as const satisfies Record<string, EmbeddingModelInfo>
// bytedance volcengine
//https://api.volcengine.com/api-docs/view/overview
// SiliconFlow
// https://docs.siliconflow.cn/
export type SiliconFlowModelId = keyof typeof siliconFlowModels
export const siliconFlowDefaultModelId: SiliconFlowModelId = "deepseek-ai/DeepSeek-V3"
export const siliconFlowModels = {
"01-ai/Yi-1.5-9B-Chat-16K": {
maxTokens: 8192,
contextWindow: 16_384,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.5,
outputPrice: 1.0,
},
"01-ai/Yi-1.5-34B-Chat-16K": {
maxTokens: 8192,
contextWindow: 16_384,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 1.0,
outputPrice: 2.0,
},
"google/gemma-2-9b-it": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.5,
outputPrice: 1.0,
},
"google/gemma-2-27b-it": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 1.0,
outputPrice: 2.0,
},
"Pro/google/gemma-2-9b-it": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.5,
outputPrice: 1.0,
},
"meta-llama/Meta-Llama-3.1-8B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.5,
outputPrice: 1.0,
},
"Pro/meta-llama/Meta-Llama-3.1-8B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.5,
outputPrice: 1.0,
},
"meta-llama/Meta-Llama-3.1-70B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 2.0,
outputPrice: 4.0,
},
"meta-llama/Meta-Llama-3.1-405B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 5.0,
outputPrice: 10.0,
},
"internlm/internlm2_5-20b-chat": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.8,
outputPrice: 1.6,
},
"Qwen/Qwen2.5-72B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 2.0,
outputPrice: 4.0,
},
"Qwen/Qwen2.5-7B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.4,
outputPrice: 0.8,
},
"Qwen/Qwen2.5-14B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.6,
outputPrice: 1.2,
},
"Qwen/Qwen2.5-32B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 1.0,
outputPrice: 2.0,
},
"Qwen/Qwen2.5-Coder-7B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.4,
outputPrice: 0.8,
},
"Qwen/Qwen2.5-VL-32B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0.4,
outputPrice: 0.8,
},
"Qwen/Qwen2.5-VL-72B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0.4,
outputPrice: 0.8,
},
"TeleAI/TeleChat2": {
maxTokens: 4096,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.3,
outputPrice: 0.6,
},
"Pro/Qwen/Qwen2.5-7B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.4,
outputPrice: 0.8,
},
"Qwen/Qwen2.5-72B-Instruct-128K": {
maxTokens: 8192,
contextWindow: 128_000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 2.0,
outputPrice: 4.0,
},
"Qwen/Qwen2-VL-72B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 2.5,
outputPrice: 5.0,
},
"OpenGVLab/InternVL2-26B": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 1.0,
outputPrice: 2.0,
},
"Pro/BAAI/bge-m3": {
maxTokens: 4096,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.3,
outputPrice: 0.6,
},
"Pro/OpenGVLab/InternVL2-8B": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0.5,
outputPrice: 1.0,
},
"Vendor-A/Qwen/Qwen2.5-72B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 2.0,
outputPrice: 4.0,
},
"Pro/Qwen/Qwen2-VL-7B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0.5,
outputPrice: 1.0,
},
"LoRA/Qwen/Qwen2.5-7B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.4,
outputPrice: 0.8,
},
"Pro/Qwen/Qwen2.5-Coder-7B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.4,
outputPrice: 0.8,
},
"LoRA/Qwen/Qwen2.5-72B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 2.0,
outputPrice: 4.0,
},
"Qwen/Qwen2.5-Coder-32B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 1.0,
outputPrice: 2.0,
},
"Pro/BAAI/bge-reranker-v2-m3": {
maxTokens: 4096,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.3,
outputPrice: 0.6,
},
"Qwen/QwQ-32B": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 1.0,
outputPrice: 2.0,
},
"Qwen/QwQ-32B-Preview": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 1.0,
outputPrice: 2.0,
},
"AIDC-AI/Marco-o1": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.5,
outputPrice: 1.0,
},
"LoRA/Qwen/Qwen2.5-14B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.6,
outputPrice: 1.2,
},
"LoRA/Qwen/Qwen2.5-32B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 1.0,
outputPrice: 2.0,
},
"meta-llama/Llama-3.3-70B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 2.0,
outputPrice: 4.0,
},
"LoRA/meta-llama/Meta-Llama-3.1-8B-Instruct": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.5,
outputPrice: 1.0,
},
"deepseek-ai/deepseek-vl2": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0.5,
outputPrice: 1.0,
},
"Qwen/QVQ-72B-Preview": {
maxTokens: 8192,
contextWindow: 32_768,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 2.5,
outputPrice: 5.0,
},
"deepseek-ai/DeepSeek-V3": {
maxTokens: 8000,
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0.28,
cacheWritesPrice: 0.14,
cacheReadsPrice: 0.014,
},
"deepseek-ai/DeepSeek-R1": {
maxTokens: 8000,
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 2.19,
cacheWritesPrice: 0.55,
cacheReadsPrice: 0.14,
},
"Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": {
maxTokens: 4096,
contextWindow: 16_384,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0.2,
cacheWritesPrice: 0.1,
cacheReadsPrice: 0.01,
},
"Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": {
maxTokens: 8000,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0.4,
cacheWritesPrice: 0.2,
cacheReadsPrice: 0.02,
},
"Pro/deepseek-ai/DeepSeek-R1-Distill-Llama-8B": {
maxTokens: 8000,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0.4,
cacheWritesPrice: 0.2,
cacheReadsPrice: 0.02,
},
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": {
maxTokens: 8000,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0.6,
cacheWritesPrice: 0.3,
cacheReadsPrice: 0.03,
},
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": {
maxTokens: 8000,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 1.0,
cacheWritesPrice: 0.5,
cacheReadsPrice: 0.05,
},
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": {
maxTokens: 8000,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 2.0,
cacheWritesPrice: 1.0,
cacheReadsPrice: 0.1,
},
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": {
maxTokens: 4096,
contextWindow: 16_384,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0.2,
cacheWritesPrice: 0.1,
cacheReadsPrice: 0.01,
},
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": {
maxTokens: 8000,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0.4,
cacheWritesPrice: 0.2,
cacheReadsPrice: 0.02,
},
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B": {
maxTokens: 8000,
contextWindow: 32_768,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0.4,
cacheWritesPrice: 0.2,
cacheReadsPrice: 0.02,
},
"Pro/deepseek-ai/DeepSeek-R1": {
maxTokens: 8000,
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 2.19,
cacheWritesPrice: 0.55,
cacheReadsPrice: 0.14,
},
"Pro/deepseek-ai/DeepSeek-V3": {
maxTokens: 8000,
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0.28,
cacheWritesPrice: 0.14,
cacheReadsPrice: 0.014,
}
} as const satisfies Record<string, ModelInfo>
export const siliconFlowEmbeddingModels = {
"BAAI/bge-m3": {
dimensions: 1024,
description: "BGE-M3 是一个多功能、多语言、多粒度的文本嵌入模型。它支持三种常见的检索功能密集检索、多向量检索和稀疏检索。该模型可以处理超过100种语言并且能够处理从短句到长达8192个词元的长文档等不同粒度的输入。BGE-M3在多语言和跨语言检索任务中表现出色在 MIRACL 和 MKQA 等基准测试中取得了领先结果。它还具有处理长文档检索的能力,在 MLDR 和 NarritiveQA 等数据集上展现了优秀性能"
},
"netease-youdao/bce-embedding-base_v1": {
dimensions: 768,
description: "bce-embedding-base_v1 是由网易有道开发的双语和跨语言嵌入模型。该模型在中英文语义表示和检索任务中表现出色尤其擅长跨语言场景。它是为检索增强生成RAG系统优化的可以直接应用于教育、医疗、法律等多个领域。该模型不需要特定指令即可使用能够高效地生成语义向量为语义搜索和问答系统提供关键支持"
},
"BAAI/bge-large-zh-v1.5": {
dimensions: 1024,
description: "BAAI/bge-large-zh-v1.5 是一个大型中文文本嵌入模型,是 BGE (BAAI General Embedding) 系列的一部分。该模型在 C-MTEB 基准测试中表现出色,在 31 个数据集上的平均得分为 64.53,在检索、语义相似度、文本对分类等多个任务中都取得了优异成绩。它支持最大 512 个 token 的输入长度,适用于各种中文自然语言处理任务,如文本检索、语义相似度计算等"
},
"BAAI/bge-large-en-v1.5": {
dimensions: 1024,
description: "BAAI/bge-large-en-v1.5 是一个大型英文文本嵌入模型,是 BGE (BAAI General Embedding) 系列的一部分。它在 MTEB 基准测试中取得了优异的表现,在 56 个数据集上的平均得分为 64.23,在检索、聚类、文本对分类等多个任务中表现出色。该模型支持最大 512 个 token 的输入长度,适用于各种自然语言处理任务,如文本检索、语义相似度计算等"
},
"Pro/BAAI/bge-m3": {
dimensions: 1024,
description: "BGE-M3 是一个多功能、多语言、多粒度的文本嵌入模型。它支持三种常见的检索功能密集检索、多向量检索和稀疏检索。该模型可以处理超过100种语言并且能够处理从短句到长达8192个词元的长文档等不同粒度的输入。BGE-M3在多语言和跨语言检索任务中表现出色在 MIRACL 和 MKQA 等基准测试中取得了领先结果。它还具有处理长文档检索的能力,在 MLDR 和 NarritiveQA 等数据集上展现了优秀性能"
}
} as const satisfies Record<string, EmbeddingModelInfo>
// Groq
// https://console.groq.com/docs/overview
export type GroqModelId = keyof typeof groqModels
export const groqDefaultModelId: GroqModelId = "llama-3.3-70b-versatile"
export const groqModels = {
"meta-llama/llama-4-scout-17b-16e-instruct": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"meta-llama/llama-4-maverick-17b-128e-instruct": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"qwen-qwq-32b": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
},
"llama-3.2-1b-preview": {
maxTokens: 4096,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"llama-3.1-8b-instant": {
maxTokens: 4096,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"mixtral-8x7b-32768": {
maxTokens: 4096,
contextWindow: 32768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"llama-3.3-70b-versatile": {
maxTokens: 4096,
contextWindow: 32768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"llama-guard-3-8b": {
maxTokens: 4096,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"llama3-70b-8192": {
maxTokens: 4096,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"llama-3.3-70b-specdec": {
maxTokens: 4096,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"llama3-8b-8192": {
maxTokens: 4096,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"llama-3.2-11b-vision-preview": {
maxTokens: 4096,
contextWindow: 8192,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"deepseek-r1-distill-llama-70b": {
maxTokens: 4096,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"llama-3.2-3b-preview": {
maxTokens: 4096,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"qwen-2.5-coder-32b": {
maxTokens: 4096,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"gemma2-9b-it": {
maxTokens: 4096,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"deepseek-r1-distill-qwen-32b": {
maxTokens: 4096,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"qwen-2.5-32b": {
maxTokens: 4096,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"llama-3.2-90b-vision-preview": {
maxTokens: 4096,
contextWindow: 8192,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
} as const satisfies Record<string, ModelInfo>
// Grok
// https://docs.x.ai/docs/models
export type GrokModelId = keyof typeof grokModels
export const grokDefaultModelId: GrokModelId = "grok-3"
export const grokModels = {
"grok-3": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
},
"grok-3-fast": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
},
"grok-3-mini": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
},
"grok-3-mini-fast": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
},
"grok-2-latest": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
},
"grok-2": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
}
} as const satisfies Record<string, ModelInfo>
/// helper functions
// get all providers
export const GetAllProviders = (): ApiProvider[] => {
return Object.values(ApiProvider)
}
export const GetEmbeddingProviders = (): ApiProvider[] => {
return [
ApiProvider.OpenAI,
ApiProvider.SiliconFlow,
ApiProvider.Google,
ApiProvider.AlibabaQwen,
ApiProvider.OpenAICompatible,
ApiProvider.Ollama,
]
}
// Get all models for a provider
export const GetProviderModels = async (provider: ApiProvider): Promise<Record<string, ModelInfo>> => {
switch (provider) {
case ApiProvider.Infio:
return infioModels
case ApiProvider.OpenRouter:
return await fetchOpenRouterModels()
case ApiProvider.OpenAI:
return openAiNativeModels
case ApiProvider.AlibabaQwen:
return qwenModels
case ApiProvider.SiliconFlow:
return siliconFlowModels
case ApiProvider.Anthropic:
return anthropicModels
case ApiProvider.Deepseek:
return deepSeekModels
case ApiProvider.Google:
return geminiModels
case ApiProvider.Groq:
return groqModels
case ApiProvider.Grok:
return grokModels
case ApiProvider.Ollama:
return {}
case ApiProvider.OpenAICompatible:
return {}
default:
return {}
}
}
// Get all model ids for a provider
export const GetProviderModelIds = async (provider: ApiProvider): Promise<string[]> => {
const models = await GetProviderModels(provider)
return Object.keys(models)
}
/// Embedding models
// Get all embedding models for a provider
export const GetEmbeddingProviderModels = (provider: ApiProvider): Record<string, EmbeddingModelInfo> => {
switch (provider) {
case ApiProvider.Google:
return geminiEmbeddingModels
case ApiProvider.SiliconFlow:
return siliconFlowEmbeddingModels
case ApiProvider.OpenAI:
return openAINativeEmbeddingModels;
case ApiProvider.AlibabaQwen:
return qwenEmbeddingModels;
default:
return {}
}
}
// Get all embedding model ids for a provider
export const GetEmbeddingProviderModelIds = (provider: ApiProvider): string[] => {
return Object.keys(GetEmbeddingProviderModels(provider))
}
// Get embedding model info for a provider and model id
export const GetEmbeddingModelInfo = (provider: ApiProvider, modelId: string): EmbeddingModelInfo => {
const models = GetEmbeddingProviderModels(provider)
return models[modelId]
}