WIP: for adding nvidia-gpu as a device

This commit is contained in:
Alexandre Ferreira 2020-06-04 14:11:13 -05:00
parent f1b720f53e
commit 94783dfc37

52
main.go
View File

@ -18,6 +18,11 @@ import (
var confFileName string var confFileName string
const (
deviceFileType int = 0
nvidiaSysType int = 1
)
type DeviceInstance struct { type DeviceInstance struct {
devicePlugin *SmarterDevicePlugin devicePlugin *SmarterDevicePlugin
@ -25,6 +30,7 @@ type DeviceInstance struct {
socketName string socketName string
deviceFile string deviceFile string
numDevices uint numDevices uint
deviceType uint
} }
type DesiredDevice struct { type DesiredDevice struct {
@ -46,8 +52,8 @@ func init() {
flag.Parse() flag.Parse()
} }
func readDevDirectory() (files []string, err error) { func readDevDirectory(dirToList string) (files []string, err error) {
f, err := os.Open("/dev") f, err := os.Open(dirToList)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -93,15 +99,44 @@ func main() {
} }
glog.V(0).Info("Reading existing devices on /dev") glog.V(0).Info("Reading existing devices on /dev")
ExistingDevices, err := readDevDirectory() ExistingDevices, err := readDevDirectory("/dev")
if err != nil { if err != nil {
glog.Errorf(err.Error()) glog.Errorf(err.Error())
os.Exit(1) os.Exit(1)
} }
ExistingDevicesSys, err := readDevDirectory("/sys/devices")
if err != nil {
glog.Errorf(err.Error())
os.Exit(1)
}
var listDevicesAvailable []DeviceInstance var listDevicesAvailable []DeviceInstance
for _, deviceToTest := range desiredDevices { for _, deviceToTest := range desiredDevices {
if deviceToTest.DeviceMatch = "nvidia-gpu" {
glog.V(0).Infof("Checking nvidia devices")
foundDevices,err := findDevicesPattern(ExistingDevices, "gpu.[0-9]*")
if err != nil {
glog.Errorf(err.Error())
os.Exit(1)
}
// If found some create the devices entry
if len(foundDevices) > 0 {
for _, deviceToCreate := range foundDevices {
var newDevice DeviceInstance
deviceId := TrimPrefix(deviceToCreate,"gpu.")
newDevice.deviceName = "smarter-devices/" + "nvidia-gpu" + deviceId
newDevice.socketName = pluginapi.DevicePluginPath + "smarter-" + d"nvidia-gpu" + deviceId + ".sock"
newDevice.deviceFile = deviceId
newDevice.numDevices = deviceToTest.NumMaxDevices
newDevice.deviceType = nvidiaSysType
listDevicesAvailable = append(listDevicesAvailable, newDevice)
glog.V(0).Infof("Creating device %s socket and %s name for %s",newDevice.deviceName,newDevice.deviceFile,deviceToTest.DeviceMatch)
}
}
}
else {
glog.V(0).Infof("Checking devices %s on /dev",deviceToTest.DeviceMatch) glog.V(0).Infof("Checking devices %s on /dev",deviceToTest.DeviceMatch)
foundDevices,err := findDevicesPattern(ExistingDevices, deviceToTest.DeviceMatch) foundDevices,err := findDevicesPattern(ExistingDevices, deviceToTest.DeviceMatch)
if err != nil { if err != nil {
@ -113,6 +148,7 @@ func main() {
if len(foundDevices) > 0 { if len(foundDevices) > 0 {
for _, deviceToCreate := range foundDevices { for _, deviceToCreate := range foundDevices {
var newDevice DeviceInstance var newDevice DeviceInstance
newDevice.deviceType = deviceFileType
newDevice.deviceName = "smarter-devices/" + deviceToCreate newDevice.deviceName = "smarter-devices/" + deviceToCreate
newDevice.socketName = pluginapi.DevicePluginPath + "smarter-" + deviceToCreate + ".sock" newDevice.socketName = pluginapi.DevicePluginPath + "smarter-" + deviceToCreate + ".sock"
newDevice.deviceFile = "/dev/" + deviceToCreate newDevice.deviceFile = "/dev/" + deviceToCreate
@ -122,6 +158,7 @@ func main() {
} }
} }
} }
}
glog.V(0).Info("Starting FS watcher.") glog.V(0).Info("Starting FS watcher.")
watcher, err := newFSWatcher(pluginapi.DevicePluginPath) watcher, err := newFSWatcher(pluginapi.DevicePluginPath)
@ -147,11 +184,20 @@ L:
var err error var err error
for _, devicesInUse := range listDevicesAvailable { for _, devicesInUse := range listDevicesAvailable {
switch devicesInUse.deviceType {
case deviceFileType :
devicesInUse.devicePlugin = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName) devicesInUse.devicePlugin = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName)
if err = devicesInUse.devicePlugin.Serve(); err != nil { if err = devicesInUse.devicePlugin.Serve(); err != nil {
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?") glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
break break
} }
case nvidiaSysType :
devicesInUse.devicePlugin = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName)
if err = devicesInUse.devicePlugin.Serve(); err != nil {
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
break
}
}
} }
if err != nil { if err != nil {
continue continue