From 879085aaed63360096ecf572c16895fe94caed45 Mon Sep 17 00:00:00 2001 From: Alexandre Ferreira Date: Wed, 24 Jun 2020 15:16:01 -0500 Subject: [PATCH 1/2] Fix for k3s >= 1.18 --- smarter-device-management-pod.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/smarter-device-management-pod.yaml b/smarter-device-management-pod.yaml index 25bac9d..bf5d3be 100644 --- a/smarter-device-management-pod.yaml +++ b/smarter-device-management-pod.yaml @@ -33,6 +33,8 @@ spec: mountPath: /var/lib/kubelet/device-plugins - name: dev-dir mountPath: /dev + - name: sys-dir + mountPath: /sys volumes: - name: device-plugin hostPath: @@ -40,4 +42,7 @@ spec: - name: dev-dir hostPath: path: /dev - terminationGracePeriodSeconds: 30 + - name: sys-dir + hostPath: + path: /sys + terminationGracePeriodSeconds: 30 From 304807e48e72b33893f9e589fe633b1eed1ed2bd Mon Sep 17 00:00:00 2001 From: Alexandre Ferreira Date: Tue, 5 Jan 2021 18:43:56 -0600 Subject: [PATCH 2/2] Fix removal of sockets files on shhutdown --- compile.sh | 22 +++++++++++++++++-- main.go | 17 +++++++++----- server.go | 3 +++ ...device-management-pod-k3s-test-xavier.yaml | 2 +- smarter-device-management-pod-k3s.yaml | 2 +- smarter-device-management-pod.yaml | 2 +- smarter-device-manager-ds-k3s.yaml | 2 +- ...ice-manager-ds-with-configmap-rpi-k3s.yaml | 2 +- ...-device-manager-ds-with-configmap-rpi.yaml | 2 +- ...-manager-ds-with-configmap-xavier-k3s.yaml | 2 +- ...vice-manager-ds-with-configmap-xavier.yaml | 2 +- smarter-device-manager-ds.yaml | 2 +- 12 files changed, 43 insertions(+), 17 deletions(-) diff --git a/compile.sh b/compile.sh index 8f6e123..f058b08 100755 --- a/compile.sh +++ b/compile.sh @@ -5,6 +5,12 @@ function printHelp() { echo $(basename $0)" options:"; echo " -A # Compiling to ${ARCHS} now, examples: linux/amd64,linux/arm/v7,linux/arm/v6,linux/arm64" + if [ ${FLAG_NOCACHE} -gt 0 ] + then + echo " -C # Do not use cache" + else + echo " -C # Use cache" + fi if [ ${FLAG_UPLOADIMAGES} -gt 0 ] then echo " -U # Do not upload images - the default is upload the images to the registry" @@ -43,8 +49,9 @@ FLAG_UPLOADMANIFEST=1 ADDITIONAL_TAG="" ADDITIONAL_IMAGE_NAME="" PUSH_OPTION="" +FLAG_NOCACHE=0 -while getopts hA:B:MST:U name +while getopts hA:B:MST:UC name do case $name in h) @@ -52,6 +59,10 @@ do exit 0;; A) ARCHS="$OPTARG";; + C) + [ ${FLAG_NOCACHE} -gt 0 ] && FLAG_NOCACHE=0; + [ ${FLAG_NOCACHE} -eq 0 ] && FLAG_NOCACHE=1; + ;; U) [ ${FLAG_UPLOADIMAGES} -gt 0 ] && FLAG_UPLOADIMAGES=0; [ ${FLAG_UPLOADIMAGES} -eq 0 ] && FLAG_UPLOADIMAGES=1; @@ -93,6 +104,13 @@ EOF fi fi +if [ $FLAG_NOCACHE -gt 0 ] +then + CACHE_OPTION="--no-cache" +else + CACHE_OPTION="" +fi + if [ $FLAG_UPLOADIMAGES -gt 0 ] then PUSH_OPTION="--push" @@ -100,6 +118,6 @@ else PUSH_OPTION="--load" fi -docker buildx build -t "${REPOSITORY_NAME}${IMAGE_NAME}${ADDITIONAL_IMAGE_NAME}:${BUILD_TAG}" --platform=${ARCHS} ${PUSH_OPTION} . +docker buildx build ${CACHE_OPTION} -t "${REPOSITORY_NAME}${IMAGE_NAME}${ADDITIONAL_IMAGE_NAME}:${BUILD_TAG}" --platform=${ARCHS} ${PUSH_OPTION} . exit 0 diff --git a/main.go b/main.go index e03ff2f..3826ad2 100644 --- a/main.go +++ b/main.go @@ -193,17 +193,17 @@ L: } var err error - for _, devicesInUse := range listDevicesAvailable { - switch devicesInUse.deviceType { + for id, _ := range listDevicesAvailable { + switch listDevicesAvailable[id].deviceType { case deviceFileType : - devicesInUse.devicePluginSmarter = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName) - if err = devicesInUse.devicePluginSmarter.Serve(); err != nil { + listDevicesAvailable[id].devicePluginSmarter = NewSmarterDevicePlugin(listDevicesAvailable[id].numDevices, listDevicesAvailable[id].deviceFile, listDevicesAvailable[id].deviceName, listDevicesAvailable[id].socketName) + if err = listDevicesAvailable[id].devicePluginSmarter.Serve(); err != nil { glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?") break } case nvidiaSysType : - devicesInUse.devicePluginNvidia = NewNvidiaDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceName,"NVIDIA_VISIBLE_DEVICES", devicesInUse.socketName, devicesInUse.deviceId) - if err = devicesInUse.devicePluginNvidia.Serve(); err != nil { + listDevicesAvailable[id].devicePluginNvidia = NewNvidiaDevicePlugin(listDevicesAvailable[id].numDevices, listDevicesAvailable[id].deviceName,"NVIDIA_VISIBLE_DEVICES", listDevicesAvailable[id].socketName, listDevicesAvailable[id].deviceId) + if err = listDevicesAvailable[id].devicePluginNvidia.Serve(); err != nil { glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?") break } @@ -234,13 +234,18 @@ L: default: glog.V(0).Infof("Received signal \"%v\", shutting down.", s) for _, devicesInUse := range listDevicesAvailable { + glog.V(0).Info("Stopping device ", devicesInUse.deviceName) switch devicesInUse.deviceType { case deviceFileType : + glog.V(0).Info("Smarter device type") if devicesInUse.devicePluginSmarter != nil { + glog.V(0).Info("Stopping device") devicesInUse.devicePluginSmarter.Stop() } case nvidiaSysType : + glog.V(0).Info("Nvidia device type") if devicesInUse.devicePluginNvidia != nil { + glog.V(0).Info("Stopping device") devicesInUse.devicePluginNvidia.Stop() } } diff --git a/server.go b/server.go index b0a75b3..f637e0b 100644 --- a/server.go +++ b/server.go @@ -94,6 +94,7 @@ func (m *SmarterDevicePlugin) Start() error { // Stop the gRPC server func (m *SmarterDevicePlugin) Stop() error { + glog.V(0).Infof("Stopping server with socket ",m.socket) if m.server == nil { return nil } @@ -101,6 +102,7 @@ func (m *SmarterDevicePlugin) Stop() error { m.server.Stop() m.server = nil close(m.stop) + glog.V(0).Info("Server stopped with socket ",m.socket) return m.cleanup() } @@ -179,6 +181,7 @@ func (m *SmarterDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreS } func (m *SmarterDevicePlugin) cleanup() error { + glog.V(0).Info("Removing file ",m.socket) if err := os.Remove(m.socket); err != nil && !os.IsNotExist(err) { return err } diff --git a/smarter-device-management-pod-k3s-test-xavier.yaml b/smarter-device-management-pod-k3s-test-xavier.yaml index 41efb53..626ada2 100644 --- a/smarter-device-management-pod-k3s-test-xavier.yaml +++ b/smarter-device-management-pod-k3s-test-xavier.yaml @@ -15,7 +15,7 @@ spec: nodeName: smarter-jetson-xavier-4bcc2584 containers: - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1 + image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2 imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false diff --git a/smarter-device-management-pod-k3s.yaml b/smarter-device-management-pod-k3s.yaml index 5ff0e70..6b1edc7 100644 --- a/smarter-device-management-pod-k3s.yaml +++ b/smarter-device-management-pod-k3s.yaml @@ -15,7 +15,7 @@ spec: nodeName: containers: - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1 + image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2 imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false diff --git a/smarter-device-management-pod.yaml b/smarter-device-management-pod.yaml index bf5d3be..985aab3 100644 --- a/smarter-device-management-pod.yaml +++ b/smarter-device-management-pod.yaml @@ -15,7 +15,7 @@ spec: nodeName: containers: - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1 + image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2 imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false diff --git a/smarter-device-manager-ds-k3s.yaml b/smarter-device-manager-ds-k3s.yaml index 2c76d5e..18b49ac 100644 --- a/smarter-device-manager-ds-k3s.yaml +++ b/smarter-device-manager-ds-k3s.yaml @@ -34,7 +34,7 @@ spec: dnsPolicy: ClusterFirstWithHostNet containers: - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1 + image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2 imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false diff --git a/smarter-device-manager-ds-with-configmap-rpi-k3s.yaml b/smarter-device-manager-ds-with-configmap-rpi-k3s.yaml index 3283b46..350254a 100644 --- a/smarter-device-manager-ds-with-configmap-rpi-k3s.yaml +++ b/smarter-device-manager-ds-with-configmap-rpi-k3s.yaml @@ -34,7 +34,7 @@ spec: dnsPolicy: ClusterFirstWithHostNet containers: - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1 + image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2 imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false diff --git a/smarter-device-manager-ds-with-configmap-rpi.yaml b/smarter-device-manager-ds-with-configmap-rpi.yaml index b8b8a95..d7c65f2 100644 --- a/smarter-device-manager-ds-with-configmap-rpi.yaml +++ b/smarter-device-manager-ds-with-configmap-rpi.yaml @@ -34,7 +34,7 @@ spec: dnsPolicy: ClusterFirstWithHostNet containers: - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1 + image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2 imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false diff --git a/smarter-device-manager-ds-with-configmap-xavier-k3s.yaml b/smarter-device-manager-ds-with-configmap-xavier-k3s.yaml index ad7c3ab..5f0fd4a 100644 --- a/smarter-device-manager-ds-with-configmap-xavier-k3s.yaml +++ b/smarter-device-manager-ds-with-configmap-xavier-k3s.yaml @@ -34,7 +34,7 @@ spec: dnsPolicy: ClusterFirstWithHostNet containers: - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1 + image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2 imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false diff --git a/smarter-device-manager-ds-with-configmap-xavier.yaml b/smarter-device-manager-ds-with-configmap-xavier.yaml index ac47530..820926e 100644 --- a/smarter-device-manager-ds-with-configmap-xavier.yaml +++ b/smarter-device-manager-ds-with-configmap-xavier.yaml @@ -34,7 +34,7 @@ spec: dnsPolicy: ClusterFirstWithHostNet containers: - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1 + image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2 imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false diff --git a/smarter-device-manager-ds.yaml b/smarter-device-manager-ds.yaml index a576508..cdd0827 100644 --- a/smarter-device-manager-ds.yaml +++ b/smarter-device-manager-ds.yaml @@ -34,7 +34,7 @@ spec: dnsPolicy: ClusterFirstWithHostNet containers: - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1 + image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2 imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false