helm/llama-cpp/values.yaml

# yaml-language-server: $schema=./values.schema.json

# Default values for llama-cpp.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

image:
  #  repository: "local/llama.cpp"
  repository: "ghcr.io/ggerganov/llama.cpp"
  pullPolicy: IfNotPresent
  # Overrides the image tag whose default is the chart appVersion.
  tag: "server-60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640"

llama:
  args:
    model: "/models/gemma-2b-it-q4_k_m.gguf"
    hf-repo: "lmstudio-ai/gemma-2b-it-GGUF"
    hf-file: "gemma-2b-it-q4_k_m.gguf"
    n-predict: "-1"
    parallel: "2"

storage:
  size: 24Gi
  storageClass: "local-path"
  accessModes:
  - ReadWriteOnce

imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""

podAnnotations: {}
podLabels: {}

podSecurityContext: {}
  # fsGroup: 2000

securityContext: {}
  # capabilities:
  #   drop:
  #   - ALL
  # readOnlyRootFilesystem: true
  # runAsNonRoot: true
  # runAsUser: 1000

service:
  type: ClusterIP
  port: 80

ingress:
  enabled: false
  className: "traefik"
  annotations:
  hosts:
    - host: llama.cluster.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local

resources: {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  # requests:
  #   cpu: 100m
  #   memory: 128Mi

livenessProbe:
  httpGet:
    path: /health
    port: http
  initialDelaySeconds: 900
readinessProbe:
  httpGet:
    path: /health
    port: http

# Additional volumes on the output Deployment definition.
volumes:
 - name: "models"
   persistentVolumeClaim:
     claimName: "models"

# Additional volumeMounts on the output Deployment definition.
volumeMounts:
 - name: "models"
   mountPath: "/models"


nodeSelector: {}

tolerations: []

affinity: {}
新增 AI 部署方案，备用 2025-01-28 23:30:49 +00:00			`# yaml-language-server: $schema=./values.schema.json`

			`# Default values for llama-cpp.`
			`# This is a YAML-formatted file.`
			`# Declare variables to be passed into your templates.`

			`replicaCount: 1`

			`image:`
			`# repository: "local/llama.cpp"`
			`repository: "ghcr.io/ggerganov/llama.cpp"`
			`pullPolicy: IfNotPresent`
			`# Overrides the image tag whose default is the chart appVersion.`
			`tag: "server-60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640"`

			`llama:`
			`args:`
			`model: "/models/gemma-2b-it-q4_k_m.gguf"`
			`hf-repo: "lmstudio-ai/gemma-2b-it-GGUF"`
			`hf-file: "gemma-2b-it-q4_k_m.gguf"`
			`n-predict: "-1"`
			`parallel: "2"`

			`storage:`
			`size: 24Gi`
			`storageClass: "local-path"`
update llama-cpp helm 2025-02-07 05:06:50 +00:00			`accessModes:`
			`- ReadWriteOnce`

新增 AI 部署方案，备用 2025-01-28 23:30:49 +00:00			`imagePullSecrets: []`
			`nameOverride: ""`
			`fullnameOverride: ""`

			`podAnnotations: {}`
			`podLabels: {}`

			`podSecurityContext: {}`
			`# fsGroup: 2000`

			`securityContext: {}`
			`# capabilities:`
			`# drop:`
			`# - ALL`
			`# readOnlyRootFilesystem: true`
			`# runAsNonRoot: true`
			`# runAsUser: 1000`

			`service:`
			`type: ClusterIP`
			`port: 80`

			`ingress:`
			`enabled: false`
			`className: "traefik"`
			`annotations:`
			`hosts:`
			`- host: llama.cluster.local`
			`paths:`
			`- path: /`
			`pathType: ImplementationSpecific`
			`tls: []`
			`# - secretName: chart-example-tls`
			`# hosts:`
			`# - chart-example.local`

			`resources: {}`
			`# We usually recommend not to specify default resources and to leave this as a conscious`
			`# choice for the user. This also increases chances charts run on environments with little`
			`# resources, such as Minikube. If you do want to specify resources, uncomment the following`
			`# lines, adjust them as necessary, and remove the curly braces after 'resources:'.`
			`# limits:`
			`# cpu: 100m`
			`# memory: 128Mi`
			`# requests:`
			`# cpu: 100m`
			`# memory: 128Mi`

			`livenessProbe:`
			`httpGet:`
			`path: /health`
			`port: http`
			`initialDelaySeconds: 900`
			`readinessProbe:`
			`httpGet:`
			`path: /health`
			`port: http`

			`# Additional volumes on the output Deployment definition.`
			`volumes:`
			`- name: "models"`
			`persistentVolumeClaim:`
			`claimName: "models"`

			`# Additional volumeMounts on the output Deployment definition.`
			`volumeMounts:`
			`- name: "models"`
			`mountPath: "/models"`


			`nodeSelector: {}`

			`tolerations: []`

			`affinity: {}`