DECADE / model_zoo.py
anonymous-penguin's picture
Initial code release
9c60174 verified
model_zoo = {
# OpenAI / Azure-hosted models (deployment string, api_version)
'gpt-5': ("gpt-5_2025-08-07", "2024-12-01-preview"),
"gpt-4.1-azure": ("gpt-4.1_2025-04-14", "2025-04-01-preview"),
'gpt-4o': ('gpt-4o_2024-11-20', '2024-10-21'),
'gpt-4o-mini': ("gpt-4o-mini", ""),
'gpt-5-openai': ("gpt-5", ""),
'gpt-5-mini-openai': ("gpt-5-mini", ""),
# vLLM-hosted models (OpenAI-compatible server)
'Qwen3-30B-A3B-Instruct-2507': ("Qwen/Qwen3-30B-A3B-Instruct-2507", ""),
'Qwen3-VL-30B-A3B-Instruct': ("Qwen3-VL-30B-A3B-Instruct", ""),
# Anthropic models via direct Anthropic API (uses ANTHROPIC_API_KEY)
'claude-opus-4-6': ("claude-opus-4-6", ""),
'claude-sonnet-4-6': ("claude-sonnet-4-6", ""),
# Anthropic / DeepSeek via an OpenAI-compatible LiteLLM proxy
# (uses LITELLM_API_KEY; selected by main.py's --tritonai flag)
'claude-opus-4-6-tritonai': ("us.anthropic.claude-opus-4-6-v1", ""),
'claude-sonnet-4-6-tritonai': ("us.anthropic.claude-sonnet-4-6-v1", ""),
'deepseek-r1-tritonai': ("us.deepseek.r1-v1:0", ""),
# Models served via an OpenAI-compatible inference API (uses NV_API_KEY)
'gpt-5.1': ("openai/openai/gpt-5.1", ""),
'gpt-5.2': ("openai/openai/gpt-5.2", ""),
'gpt-5.5': ("openai/openai/gpt-5.5", ""),
'gpt-4.1': ("us/azure/openai/gpt-4.1", ""),
'Qwen3.5-397B-A17B': ("nvidia/qwen/qwen3-5-397b-a17b", ""),
'Kimi-K2.6': ("nvidia/moonshotai/kimi-k2.6", ""),
}