Fix removal of sockets files on shhutdown

This commit is contained in:
Alexandre Ferreira 2021-01-05 18:43:56 -06:00
parent 879085aaed
commit 304807e48e
12 changed files with 43 additions and 17 deletions

View File

@ -5,6 +5,12 @@
function printHelp() {
echo $(basename $0)" options:";
echo " -A <Architectures to use> # Compiling to ${ARCHS} now, examples: linux/amd64,linux/arm/v7,linux/arm/v6,linux/arm64"
if [ ${FLAG_NOCACHE} -gt 0 ]
then
echo " -C # Do not use cache"
else
echo " -C # Use cache"
fi
if [ ${FLAG_UPLOADIMAGES} -gt 0 ]
then
echo " -U # Do not upload images - the default is upload the images to the registry"
@ -43,8 +49,9 @@ FLAG_UPLOADMANIFEST=1
ADDITIONAL_TAG=""
ADDITIONAL_IMAGE_NAME=""
PUSH_OPTION=""
FLAG_NOCACHE=0
while getopts hA:B:MST:U name
while getopts hA:B:MST:UC name
do
case $name in
h)
@ -52,6 +59,10 @@ do
exit 0;;
A)
ARCHS="$OPTARG";;
C)
[ ${FLAG_NOCACHE} -gt 0 ] && FLAG_NOCACHE=0;
[ ${FLAG_NOCACHE} -eq 0 ] && FLAG_NOCACHE=1;
;;
U)
[ ${FLAG_UPLOADIMAGES} -gt 0 ] && FLAG_UPLOADIMAGES=0;
[ ${FLAG_UPLOADIMAGES} -eq 0 ] && FLAG_UPLOADIMAGES=1;
@ -93,6 +104,13 @@ EOF
fi
fi
if [ $FLAG_NOCACHE -gt 0 ]
then
CACHE_OPTION="--no-cache"
else
CACHE_OPTION=""
fi
if [ $FLAG_UPLOADIMAGES -gt 0 ]
then
PUSH_OPTION="--push"
@ -100,6 +118,6 @@ else
PUSH_OPTION="--load"
fi
docker buildx build -t "${REPOSITORY_NAME}${IMAGE_NAME}${ADDITIONAL_IMAGE_NAME}:${BUILD_TAG}" --platform=${ARCHS} ${PUSH_OPTION} .
docker buildx build ${CACHE_OPTION} -t "${REPOSITORY_NAME}${IMAGE_NAME}${ADDITIONAL_IMAGE_NAME}:${BUILD_TAG}" --platform=${ARCHS} ${PUSH_OPTION} .
exit 0

17
main.go
View File

@ -193,17 +193,17 @@ L:
}
var err error
for _, devicesInUse := range listDevicesAvailable {
switch devicesInUse.deviceType {
for id, _ := range listDevicesAvailable {
switch listDevicesAvailable[id].deviceType {
case deviceFileType :
devicesInUse.devicePluginSmarter = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName)
if err = devicesInUse.devicePluginSmarter.Serve(); err != nil {
listDevicesAvailable[id].devicePluginSmarter = NewSmarterDevicePlugin(listDevicesAvailable[id].numDevices, listDevicesAvailable[id].deviceFile, listDevicesAvailable[id].deviceName, listDevicesAvailable[id].socketName)
if err = listDevicesAvailable[id].devicePluginSmarter.Serve(); err != nil {
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
break
}
case nvidiaSysType :
devicesInUse.devicePluginNvidia = NewNvidiaDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceName,"NVIDIA_VISIBLE_DEVICES", devicesInUse.socketName, devicesInUse.deviceId)
if err = devicesInUse.devicePluginNvidia.Serve(); err != nil {
listDevicesAvailable[id].devicePluginNvidia = NewNvidiaDevicePlugin(listDevicesAvailable[id].numDevices, listDevicesAvailable[id].deviceName,"NVIDIA_VISIBLE_DEVICES", listDevicesAvailable[id].socketName, listDevicesAvailable[id].deviceId)
if err = listDevicesAvailable[id].devicePluginNvidia.Serve(); err != nil {
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
break
}
@ -234,13 +234,18 @@ L:
default:
glog.V(0).Infof("Received signal \"%v\", shutting down.", s)
for _, devicesInUse := range listDevicesAvailable {
glog.V(0).Info("Stopping device ", devicesInUse.deviceName)
switch devicesInUse.deviceType {
case deviceFileType :
glog.V(0).Info("Smarter device type")
if devicesInUse.devicePluginSmarter != nil {
glog.V(0).Info("Stopping device")
devicesInUse.devicePluginSmarter.Stop()
}
case nvidiaSysType :
glog.V(0).Info("Nvidia device type")
if devicesInUse.devicePluginNvidia != nil {
glog.V(0).Info("Stopping device")
devicesInUse.devicePluginNvidia.Stop()
}
}

View File

@ -94,6 +94,7 @@ func (m *SmarterDevicePlugin) Start() error {
// Stop the gRPC server
func (m *SmarterDevicePlugin) Stop() error {
glog.V(0).Infof("Stopping server with socket ",m.socket)
if m.server == nil {
return nil
}
@ -101,6 +102,7 @@ func (m *SmarterDevicePlugin) Stop() error {
m.server.Stop()
m.server = nil
close(m.stop)
glog.V(0).Info("Server stopped with socket ",m.socket)
return m.cleanup()
}
@ -179,6 +181,7 @@ func (m *SmarterDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreS
}
func (m *SmarterDevicePlugin) cleanup() error {
glog.V(0).Info("Removing file ",m.socket)
if err := os.Remove(m.socket); err != nil && !os.IsNotExist(err) {
return err
}

View File

@ -15,7 +15,7 @@ spec:
nodeName: smarter-jetson-xavier-4bcc2584
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@ -15,7 +15,7 @@ spec:
nodeName: <replace with node to run>
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@ -15,7 +15,7 @@ spec:
nodeName: <replace with node to run>
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@ -34,7 +34,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@ -34,7 +34,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@ -34,7 +34,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@ -34,7 +34,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@ -34,7 +34,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false

View File

@ -34,7 +34,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: smarter-device-manager
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.1
image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:v1.1.2
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false