from xml.etree import ElementTree as ET import re seen_section_ids = {} seen_actions = {} warnings = [] class VerifyException(Exception): def __init__(self, message, node): self.message = message self.node = node def __str__(self): return self.message + ': ' + ET.tostring(self.node)[:200].replace('\n', ' ').strip() def __repr__(self): return str(self) def is_int(text): return text != None and re.match(r'^\d+$', text) def has_blank(text): return text != None and re.search(r'\s', text) def verify_keyword(node): global seen_actions, warnings for key in node.attrib: if key not in ['action', 'lang']: raise VerifyException('attribute "%s" not allowed in <%s>' % (key, node.tag), node) for key in ['action']: if key not in node.attrib: raise VerifyException('node <%s> must contain attribute "%s"' % (node.tag, key), node) if node.get('action').strip() == '': warnings.append('WARNING: empty action for %s' % ET.tostring(node).strip()) #raise VerifyException('empty action', node) if has_blank(node.get('action')): raise VerifyException('spaces not allowed in action "%s"' % node.get('action'), node) seen_actions[node.get('action')] = True if node.get('lang') not in [None, 'eng', 'esp']: raise VerifyException('unsupported lang "%s"' % node.get('lang'), node) for child in node: raise VerifyException('child <%s> not allowed in <%s>' % (child.tag, node.tag), node) def verify_sequence(node): for key in node.attrib: if key not in ['ordre', 'repetition', 'action', 'lang']: raise VerifyException('attribute "%s" not allowed in <%s>' % (key, node.tag), node) if node.get('lang') not in [None, 'eng', 'esp']: raise VerifyException('unsupported lang "%s"' % node.get('lang'), node) if node.get('ordre') not in ['strict', 'variable']: raise VerifyException('unsupported value "%s" for attribute "%s"' % (node.get('ordre'), 'ordre'), node) if has_blank(node.get('action')): raise VerifyException('spaces not allowed in action "%s"' % node.get('action'), node) for child in node: if child.tag == 'keyword': verify_keyword(child) else: raise VerifyException('child <%s> not allowed in <%s>' % (child.tag, node.tag), node) def verify_section(node): global seen_section_ids for key in node.attrib: if key not in ['id', 'action']: raise VerifyException('attribute "%s" not allowed in <%s>' % (key, node.tag), node) for key in ['id']: if key not in node.attrib: raise VerifyException('node <%s> must contain attribute "%s"' % (node.tag, key), node) if has_blank(node.get('action')): raise VerifyException('spaces not allowed in action "%s"' % node.get('action'), node) if not is_int(node.get('id')): raise VerifyException('only integers allowed for section id "%s"' % node.get('id'), node) if node.get('id') in seen_section_ids: raise VerifyException('repeated section id "%s"' % node.get('id'), node) seen_section_ids[node.get('id')] = True for child in node: if child.tag == 'sequence': verify_sequence(child) else: raise VerifyException('child <%s> not allowed in <%s>' % (child.tag, node.tag), node) if node.text != None and node.text.strip() != '': raise VerifyException('no text allowed directly in <%s>' % (node.tag), node) if node.tail != None and node.tail.strip() != '': raise VerifyException('no text allowed directly after <%s>' % (node.tag), node) def verify_liste_section(node): for key in node.attrib: if key not in ['sequences', 'ordre', 'repetition', 'action']: raise VerifyException('attribute "%s" not allowed in <%s>' % (key, node.tag), node) for child in node: if child.tag == 'section': verify_section(child) else: raise VerifyException('child <%s> not allowed in <%s>' % (child.tag, node.tag), node) if node.text != None and node.text.strip() != '': raise VerifyException('no text allowed directly in <%s>' % (node.tag), node) if node.tail != None and node.tail.strip() != '': raise VerifyException('no text allowed directly after <%s>' % (node.tag), node) def verify_root(node): if node.tag != 'homeostasis': raise VerifyException('root tag should be <homeostasis>') for key in node.attrib: if key not in ['version']: raise VerifyException('attribute "%s" not allowed in <%s>' % (key, node.tag), node) for child in node: if child.tag == 'liste_section': verify_liste_section(child) else: raise VerifyException('child <%s> not allowed in <%s>' % (child.tag, node.tag), node) if node.text != None and node.text.strip() != '': raise VerifyException('no text allowed directly in <%s>' % (node.tag), node) if node.tail != None and node.tail.strip() != '': raise VerifyException('no text allowed directly after <%s>' % (node.tag), node) def validate_xml(filename): global seen_section_ids, seen_actions, warnings seen_section_ids = {} seen_actions = {} warnings = [] try: root = ET.parse(filename).getroot() verify_root(root) except Exception as e: if len(warnings) > 0: warnings.append('--------------') if isinstance(e, VerifyException): return (False, '\n'.join(warnings) + '\n' + str(e)) else: import traceback, sys return (False, '\n'.join(warnings) + '\n' + traceback.format_exc(e)) if len(warnings) > 0: warnings.append('--------------') return (True, '\n'.join(warnings) + '\nsuccessfuly validated "%s"\nfound %d sections, %d types of action' % (filename, len(seen_section_ids), len(seen_actions))) if __name__ == '__main__': import sys print validate_xml(sys.argv[1])