7 Commits

Author SHA1 Message Date
Alexandre Ferreira
7eb7526956 Merge branch 'dev' into 'master'
Dev

See merge request arm-research/smarter/smarter-device-manager!13
2021-01-06 01:35:33 +00:00
Alexandre Ferreira
304807e48e Fix removal of sockets files on shhutdown 2021-01-05 18:43:56 -06:00
Alexandre Ferreira
879085aaed Fix for k3s >= 1.18 2020-06-24 15:16:01 -05:00
Alexandre Ferreira
43009d99cc Merge branch 'update-registry' into 'master'
Update registry with the latest image

See merge request arm-research/smarter/smarter-device-manager!12
2020-06-10 20:36:22 +00:00
Alexandre Ferreira
ee5804f7b8 Update registry with the latest image 2020-06-10 15:26:28 -05:00
Alexandre Ferreira
7a3353216d Merge branch 'k3s-1.18' into 'master'
Add tty to the configmaps and rename yaml files to correspond to uses and README

See merge request arm-research/smarter/smarter-device-manager!11
2020-06-10 16:06:27 +00:00
Alexandre Ferreira
0eab2f36e6 Add tty to the configmaps and rename yaml files to correspond to uses and README 2020-06-10 10:36:18 -05:00
16 changed files with 64 additions and 70 deletions

View File

@@ -147,3 +147,6 @@ The following command should show the node resources in a similar form as shown
kubectl describe node pike5
```
## k3s
K3s < 1.18 stores the plugin interface in a different directory than k8s and so it needs a different yaml file to enable smarter-device-manager to communicate correctly with k3s agent. So use the smart-device-manager-k3s yaml files on this reposistor for k3s < 1.18.

View File

@@ -5,6 +5,12 @@
function printHelp() {
echo $(basename $0)" options:";
echo " -A <Architectures to use> # Compiling to ${ARCHS} now, examples: linux/amd64,linux/arm/v7,linux/arm/v6,linux/arm64"
if [ ${FLAG_NOCACHE} -gt 0 ]
then
echo " -C # Do not use cache"
else
echo " -C # Use cache"
fi
if [ ${FLAG_UPLOADIMAGES} -gt 0 ]
then
echo " -U # Do not upload images - the default is upload the images to the registry"
@@ -43,8 +49,9 @@ FLAG_UPLOADMANIFEST=1
ADDITIONAL_TAG=""
ADDITIONAL_IMAGE_NAME=""
PUSH_OPTION=""
FLAG_NOCACHE=0
while getopts hA:B:MST:U name
while getopts hA:B:MST:UC name
do
case $name in
h)
@@ -52,6 +59,10 @@ do
exit 0;;
A)
ARCHS="$OPTARG";;
C)
[ ${FLAG_NOCACHE} -gt 0 ] && FLAG_NOCACHE=0;
[ ${FLAG_NOCACHE} -eq 0 ] && FLAG_NOCACHE=1;
;;
U)
[ ${FLAG_UPLOADIMAGES} -gt 0 ] && FLAG_UPLOADIMAGES=0;
[ ${FLAG_UPLOADIMAGES} -eq 0 ] && FLAG_UPLOADIMAGES=1;
@@ -93,6 +104,13 @@ EOF
fi
fi
if [ $FLAG_NOCACHE -gt 0 ]
then
CACHE_OPTION="--no-cache"
else
CACHE_OPTION=""
fi
if [ $FLAG_UPLOADIMAGES -gt 0 ]
then
PUSH_OPTION="--push"
@@ -100,6 +118,6 @@ else
PUSH_OPTION="--load"
fi
docker buildx build -t "${REPOSITORY_NAME}${IMAGE_NAME}${ADDITIONAL_IMAGE_NAME}:${BUILD_TAG}" --platform=${ARCHS} ${PUSH_OPTION} .
docker buildx build ${CACHE_OPTION} -t "${REPOSITORY_NAME}${IMAGE_NAME}${ADDITIONAL_IMAGE_NAME}:${BUILD_TAG}" --platform=${ARCHS} ${PUSH_OPTION} .
exit 0

17
main.go
View File

@@ -193,17 +193,17 @@ L:
}
var err error
for _, devicesInUse := range listDevicesAvailable {
switch devicesInUse.deviceType {
for id, _ := range listDevicesAvailable {
switch listDevicesAvailable[id].deviceType {
case deviceFileType :
devicesInUse.devicePluginSmarter = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName)
if err = devicesInUse.devicePluginSmarter.Serve(); err != nil {
listDevicesAvailable[id].devicePluginSmarter = NewSmarterDevicePlugin(listDevicesAvailable[id].numDevices, listDevicesAvailable[id].deviceFile, listDevicesAvailable[id].deviceName, listDevicesAvailable[id].socketName)
if err = listDevicesAvailable[id].devicePluginSmarter.Serve(); err != nil {
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
break
}
case nvidiaSysType :
devicesInUse.devicePluginNvidia = NewNvidiaDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceName,"NVIDIA_VISIBLE_DEVICES", devicesInUse.socketName, devicesInUse.deviceId)
if err = devicesInUse.devicePluginNvidia.Serve(); err != nil {
listDevicesAvailable[id].devicePluginNvidia = NewNvidiaDevicePlugin(listDevicesAvailable[id].numDevices, listDevicesAvailable[id].deviceName,"NVIDIA_VISIBLE_DEVICES", listDevicesAvailable[id].socketName, listDevicesAvailable[id].deviceId)
if err = listDevicesAvailable[id].devicePluginNvidia.Serve(); err != nil {
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
break
}
@@ -234,13 +234,18 @@ L:
default:
glog.V(0).Infof("Received signal \"%v\", shutting down.", s)
for _, devicesInUse := range listDevicesAvailable {
glog.V(0).Info("Stopping device ", devicesInUse.deviceName)
switch devicesInUse.deviceType {
case deviceFileType :
glog.V(0).Info("Smarter device type")
if devicesInUse.devicePluginSmarter != nil {
glog.V(0).Info("Stopping device")
devicesInUse.devicePluginSmarter.Stop()
}
case nvidiaSysType :
glog.V(0).Info("Nvidia device type")
if devicesInUse.devicePluginNvidia != nil {
glog.V(0).Info("Stopping device")
devicesInUse.devicePluginNvidia.Stop()
}
}

View File

@@ -94,6 +94,7 @@ func (m *SmarterDevicePlugin) Start() error {
// Stop the gRPC server
func (m *SmarterDevicePlugin) Stop() error {
glog.V(0).Infof("Stopping server with socket ",m.socket)
if m.server == nil {
return nil
}
@@ -101,6 +102,7 @@ func (m *SmarterDevicePlugin) Stop() error {
m.server.Stop()
m.server = nil
close(m.stop)
glog.V(0).Info("Server stopped with socket ",m.socket)
return m.cleanup()
}
@@ -179,6 +181,7 @@ func (m *SmarterDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreS
}
func (m *SmarterDevicePlugin) cleanup() error {
glog.V(0).Info("Removing file ",m.socket)
if err := os.Remove(m.socket); err != nil && !os.IsNotExist(err) {
return err
}

View File

@@ -15,7 +15,7 @@ spec:
nodeName: smarter-jetson-xavier-4bcc2584
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager/smarter-device-manager:IMAGE_ID
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@@ -15,7 +15,7 @@ spec:
nodeName: <replace with node to run>
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager/smarter-device-manager:IMAGE_ID
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@@ -1,43 +0,0 @@
apiVersion: v1
kind: Pod
metadata:
name: smarter-device-management
namespace: default
spec:
# Mark this pod as a critical add-on; when enabled, the critical add-on
# scheduler reserves resources for critical add-on pods so that they can
# be rescheduled after a failure.
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
priorityClassName: "system-node-critical"
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
hostname: smarter-device-management
nodeName: <replace with node to run>
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager/smarter-device-manager:IMAGE_ID
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
resources:
limits:
cpu: 100m
memory: 10Mi
requests:
cpu: 10m
memory: 10Mi
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- name: dev-dir
mountPath: /dev
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: dev-dir
hostPath:
path: /dev
terminationGracePeriodSeconds: 30

View File

@@ -12,10 +12,10 @@ spec:
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
hostname: smarter-device-management
nodeName: smarter-jetson-xavier-4bcc2584
nodeName: <replace with node to run>
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager/smarter-device-manager:IMAGE_ID
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
@@ -38,7 +38,7 @@ spec:
volumes:
- name: device-plugin
hostPath:
path: /var/lib/rancher/k3s/agent/kubelet/device-plugins
path: /var/lib/kubelet/device-plugins
- name: dev-dir
hostPath:
path: /dev

View File

@@ -22,3 +22,11 @@ data:
nummaxdevices: 20
- devicematch: ^vcsm.*$
nummaxdevices: 20
- devicematch: ^ttyUSB[0-9]*$
nummaxdevices: 1
- devicematch: ^ttyACM[0-9]*$
nummaxdevices: 1
- devicematch: ^ttyTHS[0-9]*$
nummaxdevices: 1
- devicematch: ^ttyS[0-9]*$
nummaxdevices: 1

View File

@@ -22,5 +22,13 @@ data:
nummaxdevices: 20
- devicematch: ^vcsm.*$
nummaxdevices: 20
- devicematch: ^ttyUSB[0-9]*$
nummaxdevices: 1
- devicematch: ^ttyACM[0-9]*$
nummaxdevices: 1
- devicematch: ^ttyTHS[0-9]*$
nummaxdevices: 1
- devicematch: ^ttyS[0-9]*$
nummaxdevices: 1
- devicematch: nvidia-gpu
nummaxdevices: 20

View File

@@ -34,7 +34,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager/smarter-device-manager:IMAGE_ID
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@@ -34,7 +34,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager/smarter-device-manager:IMAGE_ID
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@@ -32,11 +32,9 @@ spec:
hostname: smarter-device-management
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
imagePullSecrets:
- name: k8sedgeregcred
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager/smarter-device-manager:IMAGE_ID
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@@ -32,11 +32,9 @@ spec:
hostname: smarter-device-management
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
imagePullSecrets:
- name: k8sedgeregcred
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager/smarter-device-manager:IMAGE_ID
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@@ -32,11 +32,9 @@ spec:
hostname: smarter-device-management
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
imagePullSecrets:
- name: k8sedgeregcred
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager/smarter-device-manager:IMAGE_ID
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@@ -32,11 +32,9 @@ spec:
hostname: smarter-device-management
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
imagePullSecrets:
- name: k8sedgeregcred
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager/smarter-device-manager:IMAGE_ID
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false