#!/usr/bin/env python # coding: utf-8 import argparse import logging import yaml import subprocess from collections import defaultdict import re import time import datetime class Owner: def __init__(self, name): self.name = name self.queues = defaultdict(list) self.karma = defaultdict(float) self.timeleft = 0 self.running = defaultdict(int) self.resources = defaultdict(int) self.gpu = defaultdict(int) def add_job(self, job): self.queues[job.queue].append(job) if job.karma > self.karma[job.queue]: self.karma[job.queue] = job.karma self.timeleft += job.wall_time - job.elapsed_time self.resources[job.queue] += job.resources if job.elapsed_time != 0: self.running[job.queue] += 1 if job.gpu: self.gpu[job.queue] += job.resources def print_info(self): print("User {} :: Timeleft: {}".format(self.name, datetime.timedelta(seconds=self.timeleft))) for queue in self.queues.keys(): print("\t{} - Running {}, Resources: {}, GPUs: {}, Karma: {}".format(queue, self.running[queue], self.resources[queue], self.gpu[queue], self.karma[queue])) class Job: def __init__(self, job_id, elapsed_time, wall_time, resources, devices, gpu, queue, karma): self.job_id = job_id self.elapsed_time = elapsed_time self.wall_time = wall_time self.resources = resources self.devices = devices self.gpu = gpu self.queue = queue self.karma = karma def argparser(): parser = argparse.ArgumentParser() parser.add_argument('-l', '--logger', default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help="Logging level: DEBUG, INFO (default), WARNING, ERROR") args = parser.parse_args() numeric_level = getattr(logging, args.logger.upper(), None) if not isinstance(numeric_level, int): raise ValueError("Invalid log level: {}".format(args.logger)) logging.basicConfig(level=numeric_level) return args def main(): args = argparser() stats_output = subprocess.check_output(["oarstat", "--yaml"]).decode('utf-8') stats_yaml = yaml.load(stats_output) owners = {} resources_pattern = re.compile('R=([0-9]+)') walltime_pattern = re.compile('W=([0-9]+:[0-9]+:[0-9]+)') queue_pattern = re.compile('Q=(\S+)') karma_pattern = re.compile('Karma=([0-9]+\.[0-9]+)') gpu_pattern = re.compile('gpu is not null', flags=re.IGNORECASE) for job_id, job_info in stats_yaml.items(): if job_info["owner"] not in owners: owners[job_info["owner"]] = Owner(job_info["owner"]) elapsed_time = 0 if job_info["startTime"] == 0 else time.time() - job_info["startTime"] tokens = re.search(walltime_pattern, job_info["message"]).group(1).split(':') wall_time = int(tokens[0]) * 3600 + int(tokens[1]) * 60 + int(tokens[2]) resources = int(re.search(resources_pattern, job_info["message"]).group(1)) queue = re.search(queue_pattern, job_info["message"]).group(1) try: karma = float(re.search(karma_pattern, job_info["message"]).group(1)) except AttributeError: karma = 0.0 devices = job_info["assigned_network_address"] gpu = re.search(gpu_pattern, job_info["properties"]) is not None job = Job(job_id, elapsed_time, wall_time, resources, devices, gpu, queue, karma) owners[job_info["owner"]].add_job(job) for owner in owners.values(): owner.print_info() print() if __name__ == '__main__': main()