Skip to content
Snippets Groups Projects
Select Git revision
  • d2e352c6de4228606a5a0e688b6a0904a92ca4fd
  • master default protected
  • develop
  • 0.0.3
  • 0.0.2
  • 0.0.1
  • 0.0.0
7 results

plot_usecase_exampleMVML.rst

Blame
  • Chemins.py NaN GiB
    #!/usr/bin/python 
    # -*- coding: utf-8 -*-
    
    from __future__ import division 
    import sys 
    import inspect 
    import getopt 
    import operator 
    import re
    """
     Créé le mercredi 25 février 2015.
    
     traitements sur les chemins
    
    """
    mysplit=lambda x: map(lambda y : y.strip(),x.split("\t"))
    
    def mysub(string,a,b,winner):
        if winner==a:
            return re.sub("(?P<start>[,(/])(?P<sign1>-?)"+a+"/(?P<sign2>-?)"+b+"(?P<end>[,)/]-?)","\g<start>\g<sign1>"+winner+"\g<end>",string)
        return re.sub("(?P<start>[,(/])(?P<sign1>-?)"+a+"/(?P<sign2>-?)"+b+"(?P<end>[,)/]-?)","\g<start>\g<sign2>"+winner+"\g<end>",string)
    
    
    def removeUselessBraces(path):
        while 1:
            path,i=re.subn("(?P<left>[)(,])\((?P<deps>[^/^(^)]*)\)(?P<right>[,)(])","\g<left>\g<deps>\g<right>",path)
     	if i==0:
                return path
    
    def keepNumerusSuj(a,b):
        if a.count("suj")>b.count("suj"):
            return a
        if a.count("suj")<b.count("suj"):
            return b
        return a+')/('+b
    
    def keepOneIfSame(a,b):
        if a==b:
            return a
        return a+')/('+b
    
    #ne fonctionne que sur une longueur 1
    def arbitratePath(path):
        #ordre: suj < obj < ats / ato < a_obj / de_obj / p_obj < mod*
        #argc < suj < (obj / ats / ato) < (a_obj / de_obj / p_obj) < mod* / dep < __joker__
        #pour les suj/obj, d'après tes contraintes => l'heuristique de garder le suj est la meilleure solution
        #pour mod.comp / mod.super => c'est un bug ds les données deep, je dirais de garder systématiquement mod.comp qui est plus fréquent
        #pour les ambigs sur chemins plus longs, privilégier le chemin ayant le plus grd nb de suj suffit à avoir le comportement souhaité
        oldpath=path
        while 1:
            alldeps=set(["argc","suj","obj","ats","ato","a_obj","de_obj","p_obj","mod.rel","mod","mod.comp","mod.super","mod.inc","dep"])
            
            current="__joker__"
            for dep in alldeps:
                path=mysub(path,min(current,dep),max(current,dep),dep)
    
            current="argc"
            alldeps.remove(current)
            for dep in alldeps:
                path=mysub(path,min(current,dep),max(current,dep),current)
    
            current="suj"
            alldeps.remove(current)        
            for dep in alldeps:
                path=mysub(path,min(current,dep),max(current,dep),current)
    
            current="obj"
            alldeps.remove(current)        
            alldeps.remove("ats")
            alldeps.remove("ato")
            for dep in alldeps:
                path=mysub(path,min(current,dep),max(current,dep),current)
    
            current="ats"
            for dep in alldeps:
                path=mysub(path,min(current,dep),max(current,dep),current)
    
            current="ato"
            for dep in alldeps:
                path=mysub(path,min(current,dep),max(current,dep),current)
            
            current="a_obj"
            alldeps.remove(current)
            alldeps.remove("de_obj")
            alldeps.remove("p_obj")
            for dep in alldeps:
                path=mysub(path,min(current,dep),max(current,dep),current)
                
            current="de_obj"
            for dep in alldeps:
                path=mysub(path,min(current,dep),max(current,dep),current)
    
            current="p_obj"
            for dep in alldeps:
                path=mysub(path,min(current,dep),max(current,dep),current)
    
            path=mysub(path,"mod","mod.rel","mod.rel")
            path=mysub(path,"mod.comp","mod.super","mod.comp")
            
            path=re.sub('([\(])([^/^(^)]*)(\)\/\()([^/^)]*suj[^/^)]*)([\)])',lambda match: match.group(1)+keepNumerusSuj(match.group(2),match.group(4))+match.group(5),path)
            path=re.sub('([\(])([^/^(^)]*suj[^/^(^)]*)(\)\/\()([^/^)]*)([\)])',lambda match: match.group(1)+keepNumerusSuj(match.group(2),match.group(4))+match.group(5),path)
    
            path=re.sub('([\(])([^/^(^)]*)(\)\/\()([^/^)]*)([\)])',lambda match: match.group(1)+keepOneIfSame(match.group(2),match.group(4))+match.group(5),path)
    
            # path=mysub(path,"","","")
            path=removeUselessBraces(path)
            if path==oldpath:
                return path
            oldpath=path
    
    
    def arbitratePathesInFile(stream_in,stream_out):
        for line in stream_in:
            stream_out.write(arbitratePath(line))
    
    
    def deepestSlash(path):
        lb=0
        rb=0
        minProf=len(path)
        slash=-1
        for c in range(len(path)):
            if path[c]=='(':
                lb+=1
            elif path[c]==')':
                rb+=1
            elif path[c]=='/':
                prof=lb-rb
                if prof<minProf:
                    minProf=prof
                    slash=c
        return slash
    
    def cleanPath(path):
        return "("+path.replace("(","").replace(")","")+")"
    
    def expandPath(path):
        """ expand path with / in multiple pathes"""
    
        pathes=[path]
        nb=1
        for p in pathes:
            c=deepestSlash(p)
            if p[c]!='/':
                continue
            if p[c-1]==')':
                left=c-1
                rb=1
                lb=0
                while rb!=lb:
                    left-=1
                    if p[left]==')': rb+=1
                    elif p[left]=='(': lb+=1
                right=c+1
                rb=0
                lb=1
                while rb!=lb:
                    right+=1
                    if p[right]==')': rb+=1
                    elif p[right]=='(': lb+=1
            else:
                left=c-1
                while p[left-1] not in ',(':
                    left-=1
                right=c+1
                while p[right+1] not in ',)':
                    right+=1
    
            pathes.append(p[0:c]+p[right+1::])
            pathes.append(p[0:left]+p[c+1::])
            nb+=1
        return [cleanPath(p) for p in pathes if '/' not in p]
        
    
    def surfaceDependency(govs,labels):
        govs=govs.split("|")
        labels=labels.split("|")
        for i in xrange(len(govs)):
            if not labels[i].startswith("D:") and not labels[i].startswith("I:"):
                return govs[i].split(":")[-1],labels[i].split(":")[-1]
        print >>sys.stderr, "pas de sytaxe de surface!", labels
    
    def deepDependencies(govs,labels):
        govs=govs.split("|")
        labels=labels.split("|")
        deepGovs=[]
        deepLabels=[]
        for i in xrange(len(govs)):
            if not labels[i].startswith("I:") and not labels[i].startswith("S:"):
                deepGovs.append(govs[i].split(":")[-1])
                deepLabels.append(labels[i].split(":")[-1])
        return deepGovs,deepLabels
    
    def findPath(FE,T,ancestry,phrase,splitted=False,govCol=4,labelCol=5,onlyDeep=False,graph=None):
        if onlyDeep:
            return deepFindPath(FE,T,ancestry,phrase,graph,splitted,govCol,labelCol)
        return deepNSurfaceFindPath(FE,T,ancestry,phrase,splitted,govCol,labelCol)
    
    def deepFindPath(FE,T,ancestry,phrase,graph,splitted=False,govCol=4,labelCol=5):
        #le graphe doit avoir été créé
        if T!=graph.root:
            from myBFS import BFS
            BFS(graph,T)
    
        return graph.get_vertex(FE).pathToString()
    
    def deepNSurfaceFindPath(FE,T,ancestry,phrase,splitted=False,govCol=4,labelCol=5,):
        treatment=(lambda x: x) if splitted else (lambda x: mysplit(x))
        treatlabel=(lambda x: x.split(":")[-1])
       
        if FE not in ancestry.keys():
            ancestry[FE]=[]
            q=FE
            while q != '0' :
                if q!=FE and q in ancestry.keys():
                    ancestry[FE]=ancestry[q]+ancestry[FE]
                    break
                try:
                    s=treatment(phrase[q])
                except KeyError:
                    print >> sys.stderr, 'Inconsistance dans la phrase: ',phrase, FE, T
                    return '(invalid)'
                gov,label=surfaceDependency(s[govCol],s[labelCol])
                ancestry[FE].insert(0,(gov,treatlabel(label)))
                q=gov
        if T not in ancestry.keys():
            ancestry[T]=[]
            q=T
            while q != '0' :
                if q!=T and q in ancestry.keys():
                    ancestry[T]=ancestry[q]+ancestry[T]
                    break
                try:
                    s=treatment(phrase[q])
                except KeyError:
                    print >> sys.stderr, 'Inconsistance dans la phrase: ',phrase, FE, T
                    return '(invalid)'
                gov,label=surfaceDependency(s[govCol],s[labelCol])
                ancestry[T].insert(0,(gov,(treatlabel(label))))
                q=gov
    
        ancestryT=list(ancestry[T])
        ancestryFE=list(ancestry[FE])
    
        #print >>sys.stderr, FE, phrase
        s=treatment(phrase[FE])
        govs,labels=deepDependencies(s[govCol],s[labelCol])
        if T in govs:
            return "("+treatlabel(labels[govs.index(T)])+")"
        
        s=treatment(phrase[T])
        govs,labels=deepDependencies(s[govCol],s[labelCol])
        if FE in govs:
            return "("+treatlabel(labels[govs.index(FE)])  +")"  
    
        q=0
        while q < len(ancestryFE) and q < len(ancestryT) and ancestryT[q]==ancestryFE[q] :
            q+=1
    
        if q==len(ancestryT)==len(ancestryFE):
            q-=1
    
    
        ancestryT=list(map(lambda x: x[1],ancestryT))
        ancestryFE=list(map(lambda x: x[1],ancestryFE))
        ancestryT.reverse()
        ancestryT=ancestryT[:len(ancestryT)-q]
        ancestryT=["-"+ancestryT[i] for i in range(len(ancestryT))]
        ancestryFE=ancestryFE[q:]
         
        return "("+",".join(ancestryT+ancestryFE)+")"
    
    def HasRoot(sent,labeldepCol=5,rootLabel='ROOT'):
        for k in sent:
            if sent[k][labeldepCol]==rootLabel:
                return True
        return False
    
    def CountRoots(sent,labeldepCol=5,rootLabel='ROOT'):
        nb=0
        for k in sent:
            if sent[k][labeldepCol]==rootLabel:
                nb+=1
        return nb
    
    def HasCycle(sent,headCol=4):
        for k in sent:
            i=0
            head=k
            while(i<len(sent) and head not in ("0",'_')):
                head=sent[head][headCol]
                i+=1
    
            if head not in ('0','_'):
                # print k, head
                return True
    
        return False
    
    
        
    if __name__ == "__main__":
        #if debug: print inspect.stack()[0][3]
        print >>sys.stderr, sys.argv[0], "ne peut être utilisé en standalone"