Skip to content
Snippets Groups Projects
Commit 7c0b8667 authored by Franck Dary's avatar Franck Dary
Browse files

Updated launchBatches to use request on host when best hosts are available

parent 89e8420f
No related branches found
No related tags found
No related merge requests found
......@@ -48,7 +48,7 @@ def launchTrainOar(mode, expName, arguments, nbHours) :
command += " -n train:%s"%expName
command += " -E %s.stderr"%expName
command += " -O %s.stdout"%expName
command += " -p \"gpu IS NOT NULL\""
command += " -p \"gpu IS NOT NULL%s\""%getBestHostConstraint()
command += " -l walltime=%d:00:00"%nbHours
command += " \'" + "./train.sh %s bin/%s %s --silent"%(mode,expName,arguments) + "\'"
......@@ -81,7 +81,7 @@ def launchEvalOar(mode, expName, nbHours) :
command += " -n eval:%s"%expName
command += " -E %s.stderr"%expName
command += " -O %s.stdout"%expName
command += " -p \"gpu IS NOT NULL\""
command += " -p \"gpu IS NOT NULL%s\""%getBestHostConstraint()
command += " -l walltime=%d:00:00"%nbHours
command += " \"" + "./evaluate.sh %s bin/%s --silent"%(mode,expName) + "\""
......@@ -95,8 +95,57 @@ def getOarNbLongJobs() :
###############################################################################
###############################################################################
def getOarNbGpuPerNode() :
l = subprocess.Popen("oarnodes | grep gpunum=. | grep -o 'host=[^,]*' | cut -f2 -d= | sort | uniq -c", shell=True, stdout=subprocess.PIPE).stdout.read().decode('utf8').split('\n')
res = {}
for line in l :
splited = line.split()
if len(splited) != 2 :
continue
res[splited[1]] = int(splited[0])
return res
###############################################################################
###############################################################################
def getOarNbUsedGpuPerNode() :
l = subprocess.Popen("oarstat -f | grep 'assigned_hostnames =\|propert' | grep -i 'gpu is not null' -C 1 | sed '0~2d' | sort | uniq -c | awk '{print $4,$1}'", shell=True, stdout=subprocess.PIPE).stdout.read().decode("utf8").split('\n')
res = {}
for line in l :
splited = line.split()
if len(splited) != 2 :
continue
res[splited[0]] = int(splited[1])
return res
###############################################################################
###############################################################################
def getOarNbFreeGpuPerNode() :
gpus = getOarNbGpuPerNode()
usedGpus = getOarNbUsedGpuPerNode()
for gpu in gpus :
gpus[gpu] -= usedGpus[gpu] if gpu in usedGpus else 0
return gpus
###############################################################################
###############################################################################
def getBestHostConstraint() :
freeGpus = getOarNbFreeGpuPerNode()
if freeGpus["diflives1"] > 0 or freeGpus["lisnode2"] > 0 or freeGpus["lisnode3"] > 0 :
return " and host!='lifnode1' and host!='adnvideo1' and host!='asfalda1' and host!='see4c1' and host!='sensei1'"
return ""
###############################################################################
###############################################################################
if __name__ == "__main__" :
if len(sys.argv) < 4 :
printUsageAndExit()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment