Select Git revision
-
Baptiste Bauvin authoredBaptiste Bauvin authored
oarstats.py 3.63 KiB
#!/usr/bin/env python
# coding: utf-8
import argparse
import logging
import yaml
import subprocess
from collections import defaultdict
import re
import time
import datetime
class Owner:
def __init__(self, name):
self.name = name
self.queues = defaultdict(list)
self.karma = defaultdict(float)
self.timeleft = 0
self.running = defaultdict(int)
self.resources = defaultdict(int)
self.gpu = defaultdict(int)
def add_job(self, job):
self.queues[job.queue].append(job)
if job.karma > self.karma[job.queue]:
self.karma[job.queue] = job.karma
self.timeleft += job.wall_time - job.elapsed_time
self.resources[job.queue] += job.resources
if job.elapsed_time != 0:
self.running[job.queue] += 1
if job.gpu:
self.gpu[job.queue] += job.resources
def print_info(self):
print("User {} :: Total Time Reserved: {}".format(self.name, datetime.timedelta(seconds=self.timeleft)))
for queue in self.queues.keys():
print("\t{} - Running {}, Resources: {}, GPUs: {}, Karma: {}".format(queue, self.running[queue], self.resources[queue], self.gpu[queue], self.karma[queue]))
class Job:
def __init__(self, job_id, elapsed_time, wall_time, resources, devices, gpu, queue, karma):
self.job_id = job_id
self.elapsed_time = elapsed_time
self.wall_time = wall_time
self.resources = resources
self.devices = devices
self.gpu = gpu
self.queue = queue
self.karma = karma
def argparser():
parser = argparse.ArgumentParser()
parser.add_argument('-l', '--logger', default='INFO',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
help="Logging level: DEBUG, INFO (default), WARNING, ERROR")
args = parser.parse_args()
numeric_level = getattr(logging, args.logger.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError("Invalid log level: {}".format(args.logger))
logging.basicConfig(level=numeric_level)
return args
def main():
args = argparser()
stats_output = subprocess.check_output(["oarstat", "--yaml"]).decode('utf-8')
stats_yaml = yaml.load(stats_output)
owners = {}
resources_pattern = re.compile('R=([0-9]+)')
walltime_pattern = re.compile('W=([0-9]+:[0-9]+:[0-9]+)')
queue_pattern = re.compile('Q=(\S+)')
karma_pattern = re.compile('Karma=([0-9]+\.[0-9]+)')
gpu_pattern = re.compile('gpu is not null', flags=re.IGNORECASE)
for job_id, job_info in stats_yaml.items():
if job_info["owner"] not in owners:
owners[job_info["owner"]] = Owner(job_info["owner"])
elapsed_time = 0 if job_info["startTime"] == 0 else time.time() - job_info["startTime"]
tokens = re.search(walltime_pattern, job_info["message"]).group(1).split(':')
wall_time = int(tokens[0]) * 3600 + int(tokens[1]) * 60 + int(tokens[2])
resources = int(re.search(resources_pattern, job_info["message"]).group(1))
queue = re.search(queue_pattern, job_info["message"]).group(1)
try:
karma = float(re.search(karma_pattern, job_info["message"]).group(1))
except AttributeError:
karma = 0.0
devices = job_info["assigned_network_address"]
gpu = re.search(gpu_pattern, job_info["properties"]) is not None
job = Job(job_id, elapsed_time, wall_time, resources, devices, gpu, queue, karma)
owners[job_info["owner"]].add_job(job)
for owner in owners.values():
owner.print_info()
print()
if __name__ == '__main__':
main()