mirror of
https://gitlab.com/arm-research/smarter/smarter-device-manager.git
synced 2024-11-24 19:44:06 +00:00
WIP: for adding nvidia-gpu as a device
This commit is contained in:
parent
f1b720f53e
commit
94783dfc37
98
main.go
98
main.go
@ -18,6 +18,11 @@ import (
|
|||||||
|
|
||||||
var confFileName string
|
var confFileName string
|
||||||
|
|
||||||
|
const (
|
||||||
|
deviceFileType int = 0
|
||||||
|
nvidiaSysType int = 1
|
||||||
|
)
|
||||||
|
|
||||||
type DeviceInstance struct {
|
type DeviceInstance struct {
|
||||||
devicePlugin *SmarterDevicePlugin
|
devicePlugin *SmarterDevicePlugin
|
||||||
|
|
||||||
@ -25,6 +30,7 @@ type DeviceInstance struct {
|
|||||||
socketName string
|
socketName string
|
||||||
deviceFile string
|
deviceFile string
|
||||||
numDevices uint
|
numDevices uint
|
||||||
|
deviceType uint
|
||||||
}
|
}
|
||||||
|
|
||||||
type DesiredDevice struct {
|
type DesiredDevice struct {
|
||||||
@ -46,8 +52,8 @@ func init() {
|
|||||||
flag.Parse()
|
flag.Parse()
|
||||||
}
|
}
|
||||||
|
|
||||||
func readDevDirectory() (files []string, err error) {
|
func readDevDirectory(dirToList string) (files []string, err error) {
|
||||||
f, err := os.Open("/dev")
|
f, err := os.Open(dirToList)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -93,34 +99,65 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
glog.V(0).Info("Reading existing devices on /dev")
|
glog.V(0).Info("Reading existing devices on /dev")
|
||||||
ExistingDevices, err := readDevDirectory()
|
ExistingDevices, err := readDevDirectory("/dev")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf(err.Error())
|
glog.Errorf(err.Error())
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ExistingDevicesSys, err := readDevDirectory("/sys/devices")
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf(err.Error())
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
var listDevicesAvailable []DeviceInstance
|
var listDevicesAvailable []DeviceInstance
|
||||||
|
|
||||||
for _, deviceToTest := range desiredDevices {
|
for _, deviceToTest := range desiredDevices {
|
||||||
glog.V(0).Infof("Checking devices %s on /dev",deviceToTest.DeviceMatch)
|
if deviceToTest.DeviceMatch = "nvidia-gpu" {
|
||||||
foundDevices,err := findDevicesPattern(ExistingDevices, deviceToTest.DeviceMatch)
|
glog.V(0).Infof("Checking nvidia devices")
|
||||||
if err != nil {
|
foundDevices,err := findDevicesPattern(ExistingDevices, "gpu.[0-9]*")
|
||||||
glog.Errorf(err.Error())
|
if err != nil {
|
||||||
os.Exit(1)
|
glog.Errorf(err.Error())
|
||||||
}
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
// If found some create the devices entry
|
// If found some create the devices entry
|
||||||
if len(foundDevices) > 0 {
|
if len(foundDevices) > 0 {
|
||||||
for _, deviceToCreate := range foundDevices {
|
for _, deviceToCreate := range foundDevices {
|
||||||
var newDevice DeviceInstance
|
var newDevice DeviceInstance
|
||||||
newDevice.deviceName = "smarter-devices/" + deviceToCreate
|
deviceId := TrimPrefix(deviceToCreate,"gpu.")
|
||||||
newDevice.socketName = pluginapi.DevicePluginPath + "smarter-" + deviceToCreate + ".sock"
|
newDevice.deviceName = "smarter-devices/" + "nvidia-gpu" + deviceId
|
||||||
newDevice.deviceFile = "/dev/" + deviceToCreate
|
newDevice.socketName = pluginapi.DevicePluginPath + "smarter-" + d"nvidia-gpu" + deviceId + ".sock"
|
||||||
newDevice.numDevices = deviceToTest.NumMaxDevices
|
newDevice.deviceFile = deviceId
|
||||||
listDevicesAvailable = append(listDevicesAvailable, newDevice)
|
newDevice.numDevices = deviceToTest.NumMaxDevices
|
||||||
glog.V(0).Infof("Creating device %s socket and %s name for %s",newDevice.deviceName,newDevice.deviceFile,deviceToTest.DeviceMatch)
|
newDevice.deviceType = nvidiaSysType
|
||||||
}
|
listDevicesAvailable = append(listDevicesAvailable, newDevice)
|
||||||
}
|
glog.V(0).Infof("Creating device %s socket and %s name for %s",newDevice.deviceName,newDevice.deviceFile,deviceToTest.DeviceMatch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
glog.V(0).Infof("Checking devices %s on /dev",deviceToTest.DeviceMatch)
|
||||||
|
foundDevices,err := findDevicesPattern(ExistingDevices, deviceToTest.DeviceMatch)
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf(err.Error())
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If found some create the devices entry
|
||||||
|
if len(foundDevices) > 0 {
|
||||||
|
for _, deviceToCreate := range foundDevices {
|
||||||
|
var newDevice DeviceInstance
|
||||||
|
newDevice.deviceType = deviceFileType
|
||||||
|
newDevice.deviceName = "smarter-devices/" + deviceToCreate
|
||||||
|
newDevice.socketName = pluginapi.DevicePluginPath + "smarter-" + deviceToCreate + ".sock"
|
||||||
|
newDevice.deviceFile = "/dev/" + deviceToCreate
|
||||||
|
newDevice.numDevices = deviceToTest.NumMaxDevices
|
||||||
|
listDevicesAvailable = append(listDevicesAvailable, newDevice)
|
||||||
|
glog.V(0).Infof("Creating device %s socket and %s name for %s",newDevice.deviceName,newDevice.deviceFile,deviceToTest.DeviceMatch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
glog.V(0).Info("Starting FS watcher.")
|
glog.V(0).Info("Starting FS watcher.")
|
||||||
@ -147,11 +184,20 @@ L:
|
|||||||
|
|
||||||
var err error
|
var err error
|
||||||
for _, devicesInUse := range listDevicesAvailable {
|
for _, devicesInUse := range listDevicesAvailable {
|
||||||
devicesInUse.devicePlugin = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName)
|
switch devicesInUse.deviceType {
|
||||||
if err = devicesInUse.devicePlugin.Serve(); err != nil {
|
case deviceFileType :
|
||||||
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
|
devicesInUse.devicePlugin = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName)
|
||||||
break
|
if err = devicesInUse.devicePlugin.Serve(); err != nil {
|
||||||
}
|
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
case nvidiaSysType :
|
||||||
|
devicesInUse.devicePlugin = NewSmarterDevicePlugin(devicesInUse.numDevices, devicesInUse.deviceFile, devicesInUse.deviceName, devicesInUse.socketName)
|
||||||
|
if err = devicesInUse.devicePlugin.Serve(); err != nil {
|
||||||
|
glog.V(0).Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
|
Loading…
Reference in New Issue
Block a user