k3s-cluster/litellm/litellm.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: litellm-config-file
data:
  config.yaml: |
    model_list:
      - model_name: gpt-5-mini
        litellm_params:
          model: openai/gpt-5-mini-2025-08-07
          api_key: "os.environ/OPENAI_API_KEY"
      - model_name: claude-4.5-haiku
        litellm_params:
          model: "anthropic/claude-haiku-4-5-20251001"
          api_key: "os.environ/ANTHROPIC_API_KEY"
      - model_name: gemini-3-flash
        litellm_params:
          model: gemini/gemini-3-flash-preview
          api_key: "os.environ/GEMINI_API_KEY"
      - model_name: glm-4.7-flash
        litellm_params:
          model: ollama/glm-4.7-flash
          api_base: http://10.88.88.235:11434
    litellm_settings:
      # set_verbose: True  # Uncomment this if you want to see verbose logs; not recommended in production
      callbacks: ["arize_phoenix"]
      drop_params: True
      # max_budget: 100
      # budget_duration: 30d
      num_retries: 5
      request_timeout: 600
      telemetry: False
    general_settings:
      master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
      store_model_in_db: True
      proxy_budget_rescheduler_min_time: 60
      proxy_budget_rescheduler_max_time: 64
      proxy_batch_write_at: 1
      database_connection_pool_limit: 10
      # database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
    environment_variables:
      # settings for using redis caching
      # REDIS_HOST: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com
      # REDIS_PORT: "16337"
      # REDIS_PASSWORD:
      PHOENIX_COLLECTOR_ENDPOINT: "http://phoenix:4317"
      PHOENIX_COLLECTOR_HTTP_ENDPOINT: "http://phoenix:6006/v1/traces"
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: litellm-deployment
  labels:
    app: litellm
spec:
  selector:
    matchLabels:
      app: litellm
  template:
    metadata:
      labels:
        app: litellm
    spec:
      containers:
      - name: litellm
        image: ghcr.io/berriai/litellm:main-latest # it is recommended to fix a version generally
        args:
          - "--config"
          - "/app/proxy_server_config.yaml"
        ports:
        - containerPort: 4000
        volumeMounts:
        - name: config-volume
          mountPath: /app/proxy_server_config.yaml
          subPath: config.yaml
        envFrom:
        - secretRef:
            name: litellm-secrets
        env:
        - name: STORE_MODEL_IN_DB
          value: "True"
        - name: OTEL_SERVICE_NAME
          value: litellm
        - name: OTEL_EXPORTER_OTLP_ENDPOINT
          value: "http://phoenix:4317"
        - name: OTEL_EXPORTER_OTLP_PROTOCOL
          value: "grpc"
        - name: PHOENIX_COLLECTOR_ENDPOINT
          value: "http://phoenix:4317"
        - name: OTEL_RESOURCE_ATTRIBUTES
          value: "service.name=litellm,openinference.project.name=litellm"
      volumes:
        - name: config-volume
          configMap:
            name: litellm-config-file
---
apiVersion: v1
kind: Service
metadata:
  name: litellm-service
  namespace: default
spec:
  type: ClusterIP
  selector:
    app: litellm
  ports:
    - name: http
      port: 80
      targetPort: 4000