k3s-cluster/litellm/config.yaml

model_list:
  - model_name: gpt-5-mini
    litellm_params:
      model: openai/gpt-5-mini-2025-08-07
      api_key: "os.environ/OPENAI_API_KEY"
  - model_name: claude-4.5-haiku
    litellm_params:
      model: "anthropic/claude-haiku-4-5-20251001"
      api_key: "os.environ/ANTHROPIC_API_KEY"
  - model_name: gemini-3-flash
    litellm_params:
      model: gemini/gemini-3-flash-preview
      api_key: "os.environ/GEMINI_API_KEY"
  - model_name: glm-4.7-flash
    litellm_params:
      model: ollama/glm-4.7-flash
      api_base: http://10.88.88.235:11434

litellm_settings:
  # set_verbose: True  # Uncomment this if you want to see verbose logs; not recommended in production
  callbacks: ["arize_phoenix"]
  drop_params: True
  # max_budget: 100
  # budget_duration: 30d
  num_retries: 5
  request_timeout: 600
  telemetry: False

general_settings:
  master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
  store_model_in_db: True
  proxy_budget_rescheduler_min_time: 60
  proxy_budget_rescheduler_max_time: 64
  proxy_batch_write_at: 1
  database_connection_pool_limit: 10
  # database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy

environment_variables:
  # settings for using redis caching
  # REDIS_HOST: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com
  # REDIS_PORT: "16337"
  # REDIS_PASSWORD:
  PHOENIX_COLLECTOR_ENDPOINT: "http://phoenix:4317"
  PHOENIX_COLLECTOR_HTTP_ENDPOINT: "http://phoenix:6006/v1/traces"