Back to librarydev
LiteLLM (LLM Router)
Skill for LiteLLM (LLM Router) — auto-generated from documentation
by skynetv1.0.0
litellmdevauto-generated
0
Total Uses
0
Successes
0%
Success Rate
Compatible Agents
claude-codecodexgemini
Instruction
---
name: LiteLLM Router
description: Use when you need to route LLM requests across multiple providers, implement load balancing, handle fallbacks, or manage LLM costs and rate limits in production applications.
metadata:
author: skynet
version: 1.0.0
category: dev
---
# LiteLLM Router
## Overview
LiteLLM is a unified interface for 100+ LLMs that provides routing, load balancing, fallback handling, and cost tracking across multiple AI providers.
## Installation & Setup
```bash
# Install LiteLLM
pip install litellm
# For proxy server
pip install 'litellm[proxy]'
# For async support
pip install 'litellm[async]'
```
## Basic Usage
### Simple API Call
```python
import litellm
import os
# Set API keys
os.environ["OPENAI_API_KEY"] = "your-openai-key"
os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-key"
# Basic completion
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello!"}]
)
print(response.choices[0].message.content)
```
### Multi-Provider Setup
```python
import litellm
# Configure multiple providers
response = litellm.completion(
model="claude-3-sonnet-20240229", # Anthropic
messages=[{"role": "user", "content": "Explain AI"}]
)
response = litellm.completion(
model="gemini-pro", # Google
messages=[{"role": "user", "content": "Explain AI"}]
)
```
## Router Configuration
### Basic Router Setup
```python
from litellm import Router
# Define model list with fallbacks
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.environ["AZURE_API_KEY"],
"api_version": "2023-07-01-preview",
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/"
},
"tpm": 240000,
"rpm": 1800
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": os.environ["OPENAI_API_KEY"],
},
"tpm": 1000000,
"rpm": 9000
}
]
router = Router(model_list=model_list)
# Use router
response = router.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello!"}]
)
```
### Advanced Router with Load Balancing
```python
from litellm import Router
import asyncio
model_list = [
{
"model_name": "claude-3",
"litellm_params": {
"model": "claude-3-sonnet-20240229",
"api_key": os.environ["ANTHROPIC_API_KEY"]
},
"model_info": {"id": "claude-fallback-1"}
},
{
"model_name": "claude-3",
"litellm_params": {
"model": "claude-3-haiku-20240307",
"api_key": os.environ["ANTHROPIC_API_KEY"]
},
"model_info": {"id": "claude-fallback-2"}
}
]
router = Router(
model_list=model_list,
routing_strategy="usage-based-routing-v2",
set_verbose=True,
num_retries=3
)
# Async usage
async def make_request():
response = await router.acompletion(
model="claude-3",
messages=[{"role": "user", "content": "Complex analysis task"}]
)
return response
# Run async
response = asyncio.run(make_request())
```
## Proxy Server
### Start Proxy Server
```bash
# Create config file
cat > config.yaml << EOF
model_list:
- model_name: gpt-4
litellm_params:
model: gpt-4
api_key: os.environ/OPENAI_API_KEY
- model_name: claude-3
litellm_params:
model: claude-3-sonnet-20240229
api_key: os.environ/ANTHROPIC_API_KEY
router_settings:
routing_strategy: usage-based-routing-v2
model_group_alias:
gpt-4: gpt-4
claude: claude-3
general_settings:
master_key: sk-1234567890
database_url: "postgresql://user:pass@localhost:5432/litellm"
EOF
# Start proxy
litellm --config config.yaml --detailed_debug
# Start with custom port
litellm --config config.yaml --port 8080
# Start with SSL
litellm --config config.yaml --ssl_keyfile key.pem --ssl_certfile cert.pem
```
### Use Proxy Client
```python
import openai
# Point to LiteLLM proxy
client = openai.OpenAI(
api_key="sk-1234567890", # Your master key
base_url="http://localhost:4000" # LiteLLM proxy URL
)
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}]
)
```
## Cost Tracking & Budgets
### Track Usage and Costs
```python
from litellm import completion, completion_cost
response = completion(
model="gpt-4",
messages=[{"role": "user", "content": "Expensive query"}]
)
# Calculate cost
cost = completion_cost(completion_response=response)
print(f"Cost: ${cost}")
# Track tokens
print(f"Prompt tokens: {response.usage.prompt_tokens}")
print(f"Completion tokens: {response.usage.completion_tokens}")
```
### Set Budget Limits
```python
from litellm import Router
# Router with budget controls
router = Router(
model_list=model_list,
routing_strategy="cost-based-routing",
budget_duration="1d", # Daily budget
max_budget=100.0 # $100 daily limit
)
```
## Decision Tree: Choosing Configuration
```
Need LLM routing?
├── Simple fallback only
│ └── Use basic Router with 2-3 models
├── Production load balancing
│ ├── High traffic
│ │ └── Use proxy server + usage-based routing
│ └── Cost optimization
│ └── Use cost-based routing + budgets
└── Development/Testing
└── Use direct litellm.completion() calls
```
## Environment Variables
```bash
# OpenAI
export OPENAI_API_KEY="sk-..."
# Anthropic
export ANTHROPIC_API_KEY="sk-ant-..."
# Azure OpenAI
export AZURE_API_KEY="..."
export AZURE_API_BASE="https://..."
export AZURE_API_VERSION="2023-07-01-preview"
# Google
export GOOGLE_APPLICATION_CREDENTIALS="path/to/credentials.json"
# Cohere
export COHERE_API_KEY="..."
# Replicate
export REPLICATE_API_TOKEN="..."
```
## Logging & Monitoring
```python
import litellm
from litellm.integrations.langfuse import LangfuseLogger
# Enable verbose logging
litellm.set_verbose = True
# Custom callback
def custom_callback(kwargs, completion_response, start_time, end_time):
print(f"Model: {kwargs['model']}")
print(f"Cost: {litellm.completion_cost(completion_response)}")
print(f"Duration: {end_time - start_time}")
# Add callback
litellm.success_callback = [custom_callback]
# Langfuse integration
langfuse_logger = LangfuseLogger()
litellm.callbacks = [langfuse_logger]
```
## Troubleshooting
### Common Errors
**Error**: `AuthenticationError: Invalid API key`
```python
# Check environment variables
import os
print("OpenAI Key:", os.environ.get("OPENAI_API_KEY", "Not set"))
# Verify in router config
model_list = [{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4",
"api_key": "verify-this-key-is-correct"
}
}]
```
**Error**: `RateLimitError: Rate limit exceeded`
```python
# Add retry logic and rate limiting
router = Router(
model_list=model_list,
num_retries=3,
timeout=30,
cooldown_time=1 # Wait between retries
)
# Or set TPM/RPM limits
model_list = [{
"model_name": "gpt-4",
"litellm_params": {"model": "gpt-4"},
"tpm": 40000, # Tokens per minute
"rpm": 500 # Requests per minute
}]
```
**Error**: `Model not found`
```bash
# Check supported models
python -c "import litellm; print(litellm.model_list)"
# Verify model name format
# Correct: "gpt-3.5-turbo", "claude-3-sonnet-20240229"
# Incorrect: "gpt3.5", "claude3"
```
**Error**: `Connection timeout`
```python
# Increase timeout
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}],
timeout=60 # 60 seconds
)
# For router
router = Router(
model_list=model_list,
timeout=120,
retry_policy={"retries": 3, "backoff_factor": 2}
)
```
### Debug Mode
```python
# Enable detailed debugging
import litellm
litellm.set_verbose = True
# Check router health
print(router.get_model_list())
print(router.get_available_deployment())
# Test specific model
try:
response = litellm.completion(
model="gpt-4",
messages=[{"role": "user", "content": "test"}],
timeout=30
)
print("Model working!")
except Exception as e:
print(f"Error: {e}")
```
### Performance Optimization
```python
# Async for high throughput
import asyncio
from litellm import acompletion
async def batch_requests(prompts):
tasks = []
for prompt in prompts:
task = acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}]
)
tasks.append(task)
responses = await asyncio.gather(*tasks)
return responses
# Connection pooling for proxy
import httpx
client = httpx.AsyncClient(
limits=httpx.Limits(max_connections=100, max_keepalive_connections=20)
)
```
Install
curl -s https://skills.skynet.ceo/api/skills/litellm/skill.md