helm/llama-cpp/values.yaml

# yaml-language-server: $schema=./values.schema.json

# Default values for llama-cpp.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

image:
  #  repository: "local/llama.cpp"
  repository: "ghcr.io/ggerganov/llama.cpp"
  pullPolicy: IfNotPresent
  # Overrides the image tag whose default is the chart appVersion.
  tag: "server-60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640"

llama:
  args:
    model: "/models/gemma-2b-it-q4_k_m.gguf"
    hf-repo: "lmstudio-ai/gemma-2b-it-GGUF"
    hf-file: "gemma-2b-it-q4_k_m.gguf"
    n-predict: "-1"
    parallel: "2"

storage:
  size: 24Gi
  storageClass: "local-path"
  accessModes:
  - ReadWriteOnce

imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""

podAnnotations: {}
podLabels: {}

podSecurityContext: {}
  # fsGroup: 2000

securityContext: {}
  # capabilities:
  #   drop:
  #   - ALL
  # readOnlyRootFilesystem: true
  # runAsNonRoot: true
  # runAsUser: 1000

service:
  type: ClusterIP
  port: 80

ingress:
  enabled: false
  className: "traefik"
  annotations:
  hosts:
    - host: llama.cluster.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local

resources: {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  # requests:
  #   cpu: 100m
  #   memory: 128Mi

livenessProbe:
  httpGet:
    path: /health
    port: http
  initialDelaySeconds: 900
readinessProbe:
  httpGet:
    path: /health
    port: http

# Additional volumes on the output Deployment definition.
volumes:
 - name: "models"
   persistentVolumeClaim:
     claimName: "models"

# Additional volumeMounts on the output Deployment definition.
volumeMounts:
 - name: "models"
   mountPath: "/models"


nodeSelector: {}

tolerations: []

affinity: {}