新增 AI 部署方案,备用
This commit is contained in:
@@ -0,0 +1,101 @@
|
||||
# yaml-language-server: $schema=./values.schema.json
|
||||
|
||||
# Default values for llama-cpp.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
replicaCount: 1
|
||||
|
||||
image:
|
||||
# repository: "local/llama.cpp"
|
||||
repository: "ghcr.io/ggerganov/llama.cpp"
|
||||
pullPolicy: IfNotPresent
|
||||
# Overrides the image tag whose default is the chart appVersion.
|
||||
tag: "server-60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640"
|
||||
|
||||
llama:
|
||||
args:
|
||||
model: "/models/gemma-2b-it-q4_k_m.gguf"
|
||||
hf-repo: "lmstudio-ai/gemma-2b-it-GGUF"
|
||||
hf-file: "gemma-2b-it-q4_k_m.gguf"
|
||||
n-predict: "-1"
|
||||
parallel: "2"
|
||||
|
||||
storage:
|
||||
size: 24Gi
|
||||
storageClass: "local-path"
|
||||
imagePullSecrets: []
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
|
||||
podAnnotations: {}
|
||||
podLabels: {}
|
||||
|
||||
podSecurityContext: {}
|
||||
# fsGroup: 2000
|
||||
|
||||
securityContext: {}
|
||||
# capabilities:
|
||||
# drop:
|
||||
# - ALL
|
||||
# readOnlyRootFilesystem: true
|
||||
# runAsNonRoot: true
|
||||
# runAsUser: 1000
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 80
|
||||
|
||||
ingress:
|
||||
enabled: false
|
||||
className: "traefik"
|
||||
annotations:
|
||||
hosts:
|
||||
- host: llama.cluster.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
resources: {}
|
||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||
# choice for the user. This also increases chances charts run on environments with little
|
||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 900
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
|
||||
# Additional volumes on the output Deployment definition.
|
||||
volumes:
|
||||
- name: "models"
|
||||
persistentVolumeClaim:
|
||||
claimName: "models"
|
||||
|
||||
# Additional volumeMounts on the output Deployment definition.
|
||||
volumeMounts:
|
||||
- name: "models"
|
||||
mountPath: "/models"
|
||||
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
Reference in New Issue
Block a user