from xml.etree import ElementTree as ET
import re

seen_section_ids = {}
seen_actions = {}
warnings = []

class VerifyException(Exception):
    def __init__(self, message, node):
        self.message = message
        self.node = node
    def __str__(self):
        return self.message + ': ' + ET.tostring(self.node)[:200].replace('\n', ' ').strip()
    def __repr__(self):
        return str(self)

def is_int(text):
    return text != None and re.match(r'^\d+$', text)

def has_blank(text):
    return text != None and re.search(r'\s', text)

def verify_keyword(node):
    global seen_actions, warnings
    for key in node.attrib:
        if key not in ['action', 'lang']:
            raise VerifyException('attribute "%s" not allowed in <%s>' % (key, node.tag), node)
    for key in ['action']:
        if key not in node.attrib:
            raise VerifyException('node <%s> must contain attribute "%s"' % (node.tag, key), node)
    if node.get('action').strip() == '':
        warnings.append('WARNING: empty action for %s' % ET.tostring(node).strip())
        #raise VerifyException('empty action', node)
    if has_blank(node.get('action')):
        raise VerifyException('spaces not allowed in action "%s"' % node.get('action'), node)
    seen_actions[node.get('action')] = True
    if node.get('lang') not in [None, 'eng', 'esp']:
        raise VerifyException('unsupported lang "%s"' % node.get('lang'), node)
    for child in node:
        raise VerifyException('child <%s> not allowed in <%s>' % (child.tag, node.tag), node)

def verify_sequence(node):
    for key in node.attrib:
        if key not in ['ordre', 'repetition', 'action', 'lang']:
            raise VerifyException('attribute "%s" not allowed in <%s>' % (key, node.tag), node)
    if node.get('lang') not in [None, 'eng', 'esp']:
        raise VerifyException('unsupported lang "%s"' % node.get('lang'), node)
    if node.get('ordre') not in ['strict', 'variable']:
        raise VerifyException('unsupported value "%s" for attribute "%s"' % (node.get('ordre'), 'ordre'), node)
    if has_blank(node.get('action')):
        raise VerifyException('spaces not allowed in action "%s"' % node.get('action'), node)
    for child in node:
        if child.tag == 'keyword':
            verify_keyword(child)
        else:
            raise VerifyException('child <%s> not allowed in <%s>' % (child.tag, node.tag), node)

def verify_section(node):
    global seen_section_ids
    for key in node.attrib:
        if key not in ['id', 'action']:
            raise VerifyException('attribute "%s" not allowed in <%s>' % (key, node.tag), node)
    for key in ['id']:
        if key not in node.attrib:
            raise VerifyException('node <%s> must contain attribute "%s"' % (node.tag, key), node)
    if has_blank(node.get('action')):
        raise VerifyException('spaces not allowed in action "%s"' % node.get('action'), node)
    if not is_int(node.get('id')):
        raise VerifyException('only integers allowed for section id "%s"' % node.get('id'), node)
    if node.get('id') in seen_section_ids:
        raise VerifyException('repeated section id "%s"' % node.get('id'), node)
    seen_section_ids[node.get('id')] = True
    for child in node:
        if child.tag == 'sequence':
            verify_sequence(child)
        else:
            raise VerifyException('child <%s> not allowed in <%s>' % (child.tag, node.tag), node)
    if node.text != None and node.text.strip() != '':
        raise VerifyException('no text allowed directly in <%s>' % (node.tag), node)
    if node.tail != None and node.tail.strip() != '':
        raise VerifyException('no text allowed directly after <%s>' % (node.tag), node)

def verify_liste_section(node):
    for key in node.attrib:
        if key not in ['sequences', 'ordre', 'repetition', 'action']:
            raise VerifyException('attribute "%s" not allowed in <%s>' % (key, node.tag), node)
    for child in node:
        if child.tag == 'section':
            verify_section(child)
        else:
            raise VerifyException('child <%s> not allowed in <%s>' % (child.tag, node.tag), node)
    if node.text != None and node.text.strip() != '':
        raise VerifyException('no text allowed directly in <%s>' % (node.tag), node)
    if node.tail != None and node.tail.strip() != '':
        raise VerifyException('no text allowed directly after <%s>' % (node.tag), node)

def verify_root(node):
    if node.tag != 'homeostasis':
        raise VerifyException('root tag should be <homeostasis>')
    for key in node.attrib:
        if key not in ['version']:
            raise VerifyException('attribute "%s" not allowed in <%s>' % (key, node.tag), node)
    for child in node:
        if child.tag == 'liste_section':
            verify_liste_section(child)
        else:
            raise VerifyException('child <%s> not allowed in <%s>' % (child.tag, node.tag), node)
    if node.text != None and node.text.strip() != '':
        raise VerifyException('no text allowed directly in <%s>' % (node.tag), node)
    if node.tail != None and node.tail.strip() != '':
        raise VerifyException('no text allowed directly after <%s>' % (node.tag), node)

def validate_xml(filename):
    global seen_section_ids, seen_actions, warnings
    seen_section_ids = {}
    seen_actions = {}
    warnings = []
    try:
        root = ET.parse(filename).getroot()
        verify_root(root)
    except Exception as e:
        if len(warnings) > 0:
            warnings.append('--------------')
        if isinstance(e, VerifyException):
            return (False, '\n'.join(warnings) + '\n' + str(e))
        else:
            import traceback, sys
            return (False, '\n'.join(warnings) + '\n' + traceback.format_exc(e))
    if len(warnings) > 0:
        warnings.append('--------------')
    return (True, '\n'.join(warnings) + '\nsuccessfuly validated "%s"\nfound %d sections, %d types of action' % (filename, len(seen_section_ids), len(seen_actions)))

if __name__ == '__main__':
    import sys
    print validate_xml(sys.argv[1])