helm/llama-cpp/values.yaml

105 lines
2.2 KiB
YAML
Raw Permalink Normal View History

2025-01-28 23:30:49 +00:00
# yaml-language-server: $schema=./values.schema.json
# Default values for llama-cpp.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
# repository: "local/llama.cpp"
repository: "ghcr.io/ggerganov/llama.cpp"
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "server-60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640"
llama:
args:
model: "/models/gemma-2b-it-q4_k_m.gguf"
hf-repo: "lmstudio-ai/gemma-2b-it-GGUF"
hf-file: "gemma-2b-it-q4_k_m.gguf"
n-predict: "-1"
parallel: "2"
storage:
size: 24Gi
storageClass: "local-path"
2025-02-07 05:06:50 +00:00
accessModes:
- ReadWriteOnce
2025-01-28 23:30:49 +00:00
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
podAnnotations: {}
podLabels: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 80
ingress:
enabled: false
className: "traefik"
annotations:
hosts:
- host: llama.cluster.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 900
readinessProbe:
httpGet:
path: /health
port: http
# Additional volumes on the output Deployment definition.
volumes:
- name: "models"
persistentVolumeClaim:
claimName: "models"
# Additional volumeMounts on the output Deployment definition.
volumeMounts:
- name: "models"
mountPath: "/models"
nodeSelector: {}
tolerations: []
affinity: {}