新增 AI 部署方案,备用

This commit is contained in:
Sense T
2025-01-29 07:30:49 +08:00
parent 322b0781ac
commit 483ddad461
29 changed files with 2160 additions and 1 deletions

23
llama-cpp/.helmignore Normal file
View File

@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

24
llama-cpp/Chart.yaml Normal file
View File

@@ -0,0 +1,24 @@
apiVersion: v2
name: llama-cpp
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.2
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

22
llama-cpp/README.md Normal file
View File

@@ -0,0 +1,22 @@
# TuringPi Llama.cpp Chart
Deploys [Llama.cpp server](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) onto your TuringPi
cluster, complete with a persistent volume to store the model files, replication and an ingress. Assumes you have
followed the instructions at [docs.turingpi.com](https://docs.turingpi.com/docs/how-to-plan-kubernetes-installation) to
configure Longhorn, MetaLB and Traefik. By default, uses `lmstudio-ai/gemma-2b-it-GGUF` model, but this can be
overridden with custom values.
## Installation
```shell
helm install llama-cpp https://elepedus.github.io/llama-cpp/llama-cpp-0.0.1.tgz --namespace=llama-cpp
```
## Usage
By default, the ingress exposes the web UI at `llama.cluster.local`, at the same IP address as you configured
for `cluster.local` Make sure to update your `/etc/hosts` file so the new subdomain is accessible:
```
10.0.0.70 turing-cluster turing-cluster.local llama.cluster llama.cluster.local
```

View File

@@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "llama-cpp.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "llama-cpp.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "llama-cpp.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "llama-cpp.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "llama-cpp.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "llama-cpp.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "llama-cpp.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "llama-cpp.labels" -}}
helm.sh/chart: {{ include "llama-cpp.chart" . }}
{{ include "llama-cpp.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "llama-cpp.selectorLabels" -}}
app.kubernetes.io/name: {{ include "llama-cpp.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "llama-cpp.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "llama-cpp.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,80 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "llama-cpp.fullname" . }}
labels:
{{- include "llama-cpp.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "llama-cpp.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "llama-cpp.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ printf "%s:%s" .Values.image.repository (.Values.image.tag | default .Chart.AppVersion) }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
command: [ "/server"]
args:
- "--port"
- {{ .Values.service.port | quote }}
- "--host"
- "0.0.0.0"
{{- range $key, $value := .Values.llama.args }}
{{- if $value }}
- "--{{ $key }}"
- "{{ $value }}"
{{- else }}
- "--{{ $key }}"
{{- end }}
{{- end }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@@ -0,0 +1,61 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "llama-cpp.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "llama-cpp.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,11 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: models
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ .Values.storage.size }}
storageClassName: {{ .Values.storage.storageClass }}

View File

@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: { { include "llama-cpp.fullname" . } }
labels: { { - include "llama-cpp.labels" . | nindent 4 } }
spec:
type: { { .Values.service.type } }
ports:
- port: { { .Values.service.port } }
targetPort: http
protocol: TCP
name: http
selector: { { - include "llama-cpp.selectorLabels" . | nindent 4 } }

View File

@@ -0,0 +1,15 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "llama-cpp.fullname" . }}-test-connection"
labels:
{{- include "llama-cpp.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "llama-cpp.fullname" . }}:{{ .Values.service.port }}']
restartPolicy: Never

View File

@@ -0,0 +1,213 @@
{
"$schema": "http://json-schema.org/schema#",
"type": "object",
"properties": {
"affinity": {
"type": "object"
},
"fullnameOverride": {
"type": "string"
},
"image": {
"type": "object",
"properties": {
"pullPolicy": {
"type": "string"
},
"repository": {
"type": "string"
},
"tag": {
"type": "string"
}
}
},
"imagePullSecrets": {
"type": "array"
},
"ingress": {
"type": "object",
"properties": {
"annotations": {
"type": "null"
},
"className": {
"type": "string"
},
"enabled": {
"type": "boolean"
},
"hosts": {
"type": "array",
"items": {
"type": "object",
"properties": {
"host": {
"type": "string"
},
"paths": {
"type": "array",
"items": {
"type": "object",
"properties": {
"path": {
"type": "string"
},
"pathType": {
"type": "string"
}
}
}
}
}
}
},
"tls": {
"type": "array"
}
}
},
"livenessProbe": {
"type": "object",
"properties": {
"httpGet": {
"type": "object",
"properties": {
"path": {
"type": "string"
},
"port": {
"type": "string"
}
}
},
"initialDelaySeconds": {
"type": "integer"
}
}
},
"llama": {
"type": "object",
"properties": {
"args": {
"type": "object",
"properties": {
"hf-file": {
"type": "string"
},
"hf-repo": {
"type": "string"
},
"model": {
"type": "string"
},
"n-predict": {
"type": "string"
},
"parallel": {
"type": "string"
}
}
}
}
},
"nameOverride": {
"type": "string"
},
"nodeSelector": {
"type": "object"
},
"podAnnotations": {
"type": "object"
},
"podLabels": {
"type": "object"
},
"podSecurityContext": {
"type": "object"
},
"readinessProbe": {
"type": "object",
"properties": {
"httpGet": {
"type": "object",
"properties": {
"path": {
"type": "string"
},
"port": {
"type": "string"
}
}
}
}
},
"replicaCount": {
"type": "integer"
},
"resources": {
"type": "object"
},
"securityContext": {
"type": "object"
},
"service": {
"type": "object",
"properties": {
"port": {
"type": "integer"
},
"type": {
"type": "string"
}
}
},
"storage": {
"type": "object",
"properties": {
"size": {
"type": "string"
},
"storageClass": {
"type": "string"
}
}
},
"tolerations": {
"type": "array"
},
"volumeMounts": {
"type": "array",
"items": {
"type": "object",
"properties": {
"mountPath": {
"type": "string"
},
"name": {
"type": "string"
}
}
}
},
"volumes": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"persistentVolumeClaim": {
"type": "object",
"properties": {
"claimName": {
"type": "string"
}
}
}
}
}
}
}
}

101
llama-cpp/values.yaml Normal file
View File

@@ -0,0 +1,101 @@
# yaml-language-server: $schema=./values.schema.json
# Default values for llama-cpp.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
# repository: "local/llama.cpp"
repository: "ghcr.io/ggerganov/llama.cpp"
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "server-60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640"
llama:
args:
model: "/models/gemma-2b-it-q4_k_m.gguf"
hf-repo: "lmstudio-ai/gemma-2b-it-GGUF"
hf-file: "gemma-2b-it-q4_k_m.gguf"
n-predict: "-1"
parallel: "2"
storage:
size: 24Gi
storageClass: "local-path"
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
podAnnotations: {}
podLabels: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 80
ingress:
enabled: false
className: "traefik"
annotations:
hosts:
- host: llama.cluster.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 900
readinessProbe:
httpGet:
path: /health
port: http
# Additional volumes on the output Deployment definition.
volumes:
- name: "models"
persistentVolumeClaim:
claimName: "models"
# Additional volumeMounts on the output Deployment definition.
volumeMounts:
- name: "models"
mountPath: "/models"
nodeSelector: {}
tolerations: []
affinity: {}