111 lines
3.2 KiB
YAML
111 lines
3.2 KiB
YAML
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: litellm-config-file
|
|
data:
|
|
config.yaml: |
|
|
model_list:
|
|
- model_name: gpt-5-mini
|
|
litellm_params:
|
|
model: openai/gpt-5-mini-2025-08-07
|
|
api_key: "os.environ/OPENAI_API_KEY"
|
|
- model_name: claude-4.5-haiku
|
|
litellm_params:
|
|
model: "anthropic/claude-haiku-4-5-20251001"
|
|
api_key: "os.environ/ANTHROPIC_API_KEY"
|
|
- model_name: gemini-3-flash
|
|
litellm_params:
|
|
model: gemini/gemini-3-flash-preview
|
|
api_key: "os.environ/GEMINI_API_KEY"
|
|
- model_name: glm-4.7-flash
|
|
litellm_params:
|
|
model: ollama/glm-4.7-flash
|
|
api_base: http://10.88.88.235:11434
|
|
litellm_settings:
|
|
# set_verbose: True # Uncomment this if you want to see verbose logs; not recommended in production
|
|
callbacks: ["arize_phoenix"]
|
|
drop_params: True
|
|
# max_budget: 100
|
|
# budget_duration: 30d
|
|
num_retries: 5
|
|
request_timeout: 600
|
|
telemetry: False
|
|
general_settings:
|
|
master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
|
|
store_model_in_db: True
|
|
proxy_budget_rescheduler_min_time: 60
|
|
proxy_budget_rescheduler_max_time: 64
|
|
proxy_batch_write_at: 1
|
|
database_connection_pool_limit: 10
|
|
# database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
|
|
environment_variables:
|
|
# settings for using redis caching
|
|
# REDIS_HOST: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com
|
|
# REDIS_PORT: "16337"
|
|
# REDIS_PASSWORD:
|
|
PHOENIX_COLLECTOR_ENDPOINT: "http://phoenix:4317"
|
|
PHOENIX_COLLECTOR_HTTP_ENDPOINT: "http://phoenix:6006/v1/traces"
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: litellm-deployment
|
|
labels:
|
|
app: litellm
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app: litellm
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: litellm
|
|
spec:
|
|
containers:
|
|
- name: litellm
|
|
image: ghcr.io/berriai/litellm:main-latest # it is recommended to fix a version generally
|
|
args:
|
|
- "--config"
|
|
- "/app/proxy_server_config.yaml"
|
|
ports:
|
|
- containerPort: 4000
|
|
volumeMounts:
|
|
- name: config-volume
|
|
mountPath: /app/proxy_server_config.yaml
|
|
subPath: config.yaml
|
|
envFrom:
|
|
- secretRef:
|
|
name: litellm-secrets
|
|
env:
|
|
- name: STORE_MODEL_IN_DB
|
|
value: "True"
|
|
- name: OTEL_SERVICE_NAME
|
|
value: litellm
|
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
|
value: "http://phoenix:4317"
|
|
- name: OTEL_EXPORTER_OTLP_PROTOCOL
|
|
value: "grpc"
|
|
- name: PHOENIX_COLLECTOR_ENDPOINT
|
|
value: "http://phoenix:4317"
|
|
- name: OTEL_RESOURCE_ATTRIBUTES
|
|
value: "service.name=litellm,openinference.project.name=litellm"
|
|
volumes:
|
|
- name: config-volume
|
|
configMap:
|
|
name: litellm-config-file
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: litellm-service
|
|
namespace: default
|
|
spec:
|
|
type: ClusterIP
|
|
selector:
|
|
app: litellm
|
|
ports:
|
|
- name: http
|
|
port: 80
|
|
targetPort: 4000
|
|
|