Skip to content
Snippets Groups Projects
Select Git revision
  • e0a9330ff9693dd029a471c7a2d1df09a247726f
  • master default
  • object
  • develop protected
  • private_algos
  • cuisine
  • SMOTE
  • revert-76c4cca5
  • archive protected
  • no_graphviz
  • 0.0.1
11 results

CQBoostUtils.py

Blame
  • oarstats.py 3.63 KiB
    #!/usr/bin/env python
    # coding: utf-8
    
    import argparse
    import logging
    import yaml
    import subprocess
    from collections import defaultdict
    import re
    import time
    import datetime
    
    
    class Owner:
        def __init__(self, name):
            self.name = name
            self.queues = defaultdict(list)
            self.karma = defaultdict(float)
            self.timeleft = 0
            self.running = defaultdict(int)
            self.resources = defaultdict(int)
            self.gpu = defaultdict(int)
    
        def add_job(self, job):
            self.queues[job.queue].append(job)
            if job.karma > self.karma[job.queue]:
                self.karma[job.queue] = job.karma
            self.timeleft += job.wall_time - job.elapsed_time
            self.resources[job.queue] += job.resources
            if job.elapsed_time != 0:
                self.running[job.queue] += 1
            if job.gpu:
                self.gpu[job.queue] += job.resources
    
        def print_info(self):
            print("User {} :: Total Time Reserved: {}".format(self.name, datetime.timedelta(seconds=self.timeleft)))
            for queue in self.queues.keys():
                print("\t{} - Running {}, Resources: {}, GPUs: {}, Karma: {}".format(queue, self.running[queue], self.resources[queue], self.gpu[queue], self.karma[queue]))
    
    
    class Job:
        def __init__(self, job_id, elapsed_time, wall_time, resources, devices, gpu, queue, karma):
            self.job_id = job_id
            self.elapsed_time = elapsed_time
            self.wall_time = wall_time
            self.resources = resources
            self.devices = devices
            self.gpu = gpu
            self.queue = queue
            self.karma = karma
    
    
    def argparser():
        parser = argparse.ArgumentParser()
        parser.add_argument('-l', '--logger', default='INFO',
                            choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
                            help="Logging level: DEBUG, INFO (default), WARNING, ERROR")
        args = parser.parse_args()
    
        numeric_level = getattr(logging, args.logger.upper(), None)
        if not isinstance(numeric_level, int):
            raise ValueError("Invalid log level: {}".format(args.logger))
        logging.basicConfig(level=numeric_level)
    
        return args
    
    
    def main():
        args = argparser()
    
        stats_output = subprocess.check_output(["oarstat", "--yaml"]).decode('utf-8')
        stats_yaml = yaml.load(stats_output)
    
        owners = {}
        resources_pattern = re.compile('R=([0-9]+)')
        walltime_pattern = re.compile('W=([0-9]+:[0-9]+:[0-9]+)')
        queue_pattern = re.compile('Q=(\S+)')
        karma_pattern = re.compile('Karma=([0-9]+\.[0-9]+)')
        gpu_pattern = re.compile('gpu is not null', flags=re.IGNORECASE)
        for job_id, job_info in stats_yaml.items():
            if job_info["owner"] not in owners:
                owners[job_info["owner"]] = Owner(job_info["owner"])
            elapsed_time = 0 if job_info["startTime"] == 0 else time.time() - job_info["startTime"]
            tokens = re.search(walltime_pattern, job_info["message"]).group(1).split(':')
            wall_time = int(tokens[0]) * 3600 + int(tokens[1]) * 60 + int(tokens[2])
            resources = int(re.search(resources_pattern, job_info["message"]).group(1))
            queue = re.search(queue_pattern, job_info["message"]).group(1)
            try:
                karma = float(re.search(karma_pattern, job_info["message"]).group(1))
            except AttributeError:
                karma = 0.0
            devices = job_info["assigned_network_address"]
            gpu = re.search(gpu_pattern, job_info["properties"]) is not None
            job = Job(job_id, elapsed_time, wall_time, resources, devices, gpu, queue, karma)
            owners[job_info["owner"]].add_job(job)
    
        for owner in owners.values():
            owner.print_info()
            print()
    
    
    if __name__ == '__main__':
        main()