diff --git a/litellm/litellm.yaml b/litellm/litellm.yaml index 7abd261..736637b 100644 --- a/litellm/litellm.yaml +++ b/litellm/litellm.yaml @@ -4,19 +4,47 @@ metadata: name: litellm-config-file data: config.yaml: | - model_list: - - model_name: gpt-4.1-mini - litellm_params: - model: openai/gpt-4.1-mini - api_key: "os.environ/OPENAI_API_KEY" - - model_name: qwen3:32b - litellm_params: - model: ollama/qwen3:32b - api_base: "http://10.88.88.236:11434" - - model_name: gemma3:27b - litellm_params: - model: ollama/gemma3:27b - api_base: "http://10.88.88.236:11434" + model_list: + - model_name: gpt-5-mini + litellm_params: + model: openai/gpt-5-mini-2025-08-07 + api_key: "os.environ/OPENAI_API_KEY" + - model_name: claude-4.5-haiku + litellm_params: + model: "anthropic/claude-haiku-4-5-20251001" + api_key: "os.environ/ANTHROPIC_API_KEY" + - model_name: gemini-3-flash + litellm_params: + model: gemini/gemini-3-flash-preview + api_key: "os.environ/GEMINI_API_KEY" + - model_name: glm-4.7-flash + litellm_params: + model: ollama/glm-4.7-flash + api_base: http://10.88.88.235:11434 + litellm_settings: + # set_verbose: True # Uncomment this if you want to see verbose logs; not recommended in production + callbacks: ["arize_phoenix"] + drop_params: True + # max_budget: 100 + # budget_duration: 30d + num_retries: 5 + request_timeout: 600 + telemetry: False + general_settings: + master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys + store_model_in_db: True + proxy_budget_rescheduler_min_time: 60 + proxy_budget_rescheduler_max_time: 64 + proxy_batch_write_at: 1 + database_connection_pool_limit: 10 + # database_url: "postgresql://:@:/" # [OPTIONAL] use for token-based auth to proxy + environment_variables: + # settings for using redis caching + # REDIS_HOST: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com + # REDIS_PORT: "16337" + # REDIS_PASSWORD: + PHOENIX_COLLECTOR_ENDPOINT: "http://phoenix:4317" + PHOENIX_COLLECTOR_HTTP_ENDPOINT: "http://phoenix:6006/v1/traces" --- apiVersion: apps/v1 kind: Deployment