WIP: for adding nvidia-gpu as a device

This commit is contained in:
Alexandre Ferreira 2020-06-04 14:11:13 -05:00
parent f1b720f53e
commit 94783dfc37

98
main.go
View File

@ -18,6 +18,11 @@ import (
var confFileName string var confFileName string
const (
deviceFileType int = 0
nvidiaSysType int = 1
)
type DeviceInstance struct { type DeviceInstance struct {
devicePlugin *SmarterDevicePlugin devicePlugin *SmarterDevicePlugin
@ -25,6 +30,7 @@ type DeviceInstance struct {
socketName string socketName string
deviceFile string deviceFile string
numDevices uint numDevices uint
deviceType uint
} }
type DesiredDevice struct { type DesiredDevice struct {
@ -46,8 +52,8 @@ func init() {
flag.Parse() flag.Parse()
} }
func readDevDirectory() (files []string, err error) { func readDevDirectory(dirToList string) (files []string, err error) {
f, err := os.Open("/dev") f, err := os.Open(dirToList)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -93,34 +99,65 @@ func main() {
} }
glog.V(0).Info("Reading existing devices on /dev") glog.V(0).Info("Reading existing devices on /dev")
ExistingDevices, err := readDevDirectory() ExistingDevices, err := readDevDirectory("/dev")
if err != nil { if err != nil {
glog.Errorf(err.Error()) glog.Errorf(err.Error())
os.Exit(1) os.Exit(1)
} }
ExistingDevicesSys, err := readDevDirectory("/sys/devices")
if err != nil {
glog.Errorf(err.Error())
os.Exit(1)
}
var listDevicesAvailable []DeviceInstance var listDevicesAvailable []DeviceInstance
for _, deviceToTest := range desiredDevices { for _, deviceToTest := range desiredDevices {
glog.V(0).Infof("Checking devices %s on /dev",deviceToTest.DeviceMatch) if deviceToTest.DeviceMatch = "nvidia-gpu" {
foundDevices,err := findDevicesPattern(ExistingDevices, deviceToTest.DeviceMatch) glog.V(0).Infof("Checking nvidia devices")
if err != nil { foundDevices,err := findDevicesPattern(ExistingDevices, "gpu.[0-9]*")
glog.Errorf(err.Error()) if err != nil {
os.Exit(1) glog.Errorf(err.Error())
} os.Exit(1)
}
// If found some create the devices entry // If found some create the devices entry
if len(foundDevices) > 0 { if len(foundDevices) > 0 {
for _, deviceToCreate := range foundDevices { for _, deviceToCreate := range foundDevices {
var newDevice DeviceInstance var newDevice DeviceInstance
newDevice.deviceName = "smarter-devices/" + deviceToCreate deviceId := TrimPrefix(deviceToCreate,"gpu.")
newDevice.socketName = pluginapi.DevicePluginPath + "smarter-" + deviceToCreate + ".sock" newDevice.deviceName = "smarter-devices/" + "nvidia-gpu" + deviceId
newDevice.deviceFile = "/dev/" + deviceToCreate newDevice.socketName = pluginapi.DevicePluginPath + "smarter-" + d"nvidia-gpu" + deviceId + ".sock"
newDevice.numDevices = deviceToTest.NumMaxDevices newDevice.deviceFile = deviceId
listDevicesAvailable = append(listDevicesAvailable, newDevice) newDevice.numDevices = deviceToTest.NumMaxDevices
glog.V(0).Infof("Creating device %s socket and %s name for %s",newDevice.deviceName,newDevice.deviceFile,deviceToTest.DeviceMatch) newDevice.deviceType = nvidiaSysType
} listDevicesAvailable = append(listDevicesAvailable, newDevice)
} glog.V(0).Infof("Creating device %s socket and %s name for %s",newDevice.deviceName,newDevice.deviceFile,deviceToTest.DeviceMatch)
}
}
}
else {
glog.V(0).Infof("Checking devices %s on /dev",deviceToTest.DeviceMatch)
foundDevices,err := findDevicesPattern(ExistingDevices, deviceToTest.DeviceMatch)
if err != nil {
glog.Errorf(err.Error())
os.Exit(1)
}
// If found some create the devices entry
if len(foundDevices) > 0 {
for _, deviceToCreate := range foundDevices {
var newDevice DeviceInstance
newDevice.deviceType = deviceFileType
newDevice.deviceName = "smarter-devices/" + deviceToCreate
newDevice.socketName = pluginapi.DevicePluginPath + "smarter-" + deviceToCreate + ".sock"
newDevice.deviceFile = "/dev/" + deviceToCreate
newDevice.numDevices = deviceToTest.NumMaxDevices
listDevicesAvailable = append(listDevicesAvailable, newDevice)
glog.V(0).Infof("Creating device %s socket and %s name for %s",newDevice.deviceName,newDevice.deviceFile,deviceToTest.DeviceMatch)
}
}
}
} }
glog.V(0).Info("Starting FS watcher.") glog.V(0).Info("Starting FS watcher.")
@ -147,11 +184,20 @@ L:
var err error var err error
for _, devicesInUse := range listDevicesAvailable { for _, devicesInUse := range listDevicesAvailable {
devicesInUse.devicePlugin = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName) switch devicesInUse.deviceType {
if err = devicesInUse.devicePlugin.Serve(); err != nil { case deviceFileType :
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?") devicesInUse.devicePlugin = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName)
break if err = devicesInUse.devicePlugin.Serve(); err != nil {
} glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
break
}
case nvidiaSysType :
devicesInUse.devicePlugin = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName)
if err = devicesInUse.devicePlugin.Serve(); err != nil {
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
break
}
}
} }
if err != nil { if err != nil {
continue continue