Select Git revision
plot_usecase_exampleMVML.rst
-
Dominique Benielli authoredDominique Benielli authored
Chemins.py NaN GiB
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import inspect
import getopt
import operator
import re
"""
Créé le mercredi 25 février 2015.
traitements sur les chemins
"""
mysplit=lambda x: map(lambda y : y.strip(),x.split("\t"))
def mysub(string,a,b,winner):
if winner==a:
return re.sub("(?P<start>[,(/])(?P<sign1>-?)"+a+"/(?P<sign2>-?)"+b+"(?P<end>[,)/]-?)","\g<start>\g<sign1>"+winner+"\g<end>",string)
return re.sub("(?P<start>[,(/])(?P<sign1>-?)"+a+"/(?P<sign2>-?)"+b+"(?P<end>[,)/]-?)","\g<start>\g<sign2>"+winner+"\g<end>",string)
def removeUselessBraces(path):
while 1:
path,i=re.subn("(?P<left>[)(,])\((?P<deps>[^/^(^)]*)\)(?P<right>[,)(])","\g<left>\g<deps>\g<right>",path)
if i==0:
return path
def keepNumerusSuj(a,b):
if a.count("suj")>b.count("suj"):
return a
if a.count("suj")<b.count("suj"):
return b
return a+')/('+b
def keepOneIfSame(a,b):
if a==b:
return a
return a+')/('+b
#ne fonctionne que sur une longueur 1
def arbitratePath(path):
#ordre: suj < obj < ats / ato < a_obj / de_obj / p_obj < mod*
#argc < suj < (obj / ats / ato) < (a_obj / de_obj / p_obj) < mod* / dep < __joker__
#pour les suj/obj, d'après tes contraintes => l'heuristique de garder le suj est la meilleure solution
#pour mod.comp / mod.super => c'est un bug ds les données deep, je dirais de garder systématiquement mod.comp qui est plus fréquent
#pour les ambigs sur chemins plus longs, privilégier le chemin ayant le plus grd nb de suj suffit à avoir le comportement souhaité
oldpath=path
while 1:
alldeps=set(["argc","suj","obj","ats","ato","a_obj","de_obj","p_obj","mod.rel","mod","mod.comp","mod.super","mod.inc","dep"])
current="__joker__"
for dep in alldeps:
path=mysub(path,min(current,dep),max(current,dep),dep)
current="argc"
alldeps.remove(current)
for dep in alldeps:
path=mysub(path,min(current,dep),max(current,dep),current)
current="suj"
alldeps.remove(current)
for dep in alldeps:
path=mysub(path,min(current,dep),max(current,dep),current)
current="obj"
alldeps.remove(current)
alldeps.remove("ats")
alldeps.remove("ato")
for dep in alldeps:
path=mysub(path,min(current,dep),max(current,dep),current)
current="ats"
for dep in alldeps:
path=mysub(path,min(current,dep),max(current,dep),current)
current="ato"
for dep in alldeps:
path=mysub(path,min(current,dep),max(current,dep),current)
current="a_obj"
alldeps.remove(current)
alldeps.remove("de_obj")
alldeps.remove("p_obj")
for dep in alldeps:
path=mysub(path,min(current,dep),max(current,dep),current)
current="de_obj"
for dep in alldeps:
path=mysub(path,min(current,dep),max(current,dep),current)
current="p_obj"
for dep in alldeps:
path=mysub(path,min(current,dep),max(current,dep),current)
path=mysub(path,"mod","mod.rel","mod.rel")
path=mysub(path,"mod.comp","mod.super","mod.comp")
path=re.sub('([\(])([^/^(^)]*)(\)\/\()([^/^)]*suj[^/^)]*)([\)])',lambda match: match.group(1)+keepNumerusSuj(match.group(2),match.group(4))+match.group(5),path)
path=re.sub('([\(])([^/^(^)]*suj[^/^(^)]*)(\)\/\()([^/^)]*)([\)])',lambda match: match.group(1)+keepNumerusSuj(match.group(2),match.group(4))+match.group(5),path)
path=re.sub('([\(])([^/^(^)]*)(\)\/\()([^/^)]*)([\)])',lambda match: match.group(1)+keepOneIfSame(match.group(2),match.group(4))+match.group(5),path)
# path=mysub(path,"","","")
path=removeUselessBraces(path)
if path==oldpath:
return path
oldpath=path
def arbitratePathesInFile(stream_in,stream_out):
for line in stream_in:
stream_out.write(arbitratePath(line))
def deepestSlash(path):
lb=0
rb=0
minProf=len(path)
slash=-1
for c in range(len(path)):
if path[c]=='(':
lb+=1
elif path[c]==')':
rb+=1
elif path[c]=='/':
prof=lb-rb
if prof<minProf:
minProf=prof
slash=c
return slash
def cleanPath(path):
return "("+path.replace("(","").replace(")","")+")"
def expandPath(path):
""" expand path with / in multiple pathes"""
pathes=[path]
nb=1
for p in pathes:
c=deepestSlash(p)
if p[c]!='/':
continue
if p[c-1]==')':
left=c-1
rb=1
lb=0
while rb!=lb:
left-=1
if p[left]==')': rb+=1
elif p[left]=='(': lb+=1
right=c+1
rb=0
lb=1
while rb!=lb:
right+=1
if p[right]==')': rb+=1
elif p[right]=='(': lb+=1
else:
left=c-1
while p[left-1] not in ',(':
left-=1
right=c+1
while p[right+1] not in ',)':
right+=1
pathes.append(p[0:c]+p[right+1::])
pathes.append(p[0:left]+p[c+1::])
nb+=1
return [cleanPath(p) for p in pathes if '/' not in p]
def surfaceDependency(govs,labels):
govs=govs.split("|")
labels=labels.split("|")
for i in xrange(len(govs)):
if not labels[i].startswith("D:") and not labels[i].startswith("I:"):
return govs[i].split(":")[-1],labels[i].split(":")[-1]
print >>sys.stderr, "pas de sytaxe de surface!", labels
def deepDependencies(govs,labels):
govs=govs.split("|")
labels=labels.split("|")
deepGovs=[]
deepLabels=[]
for i in xrange(len(govs)):
if not labels[i].startswith("I:") and not labels[i].startswith("S:"):
deepGovs.append(govs[i].split(":")[-1])
deepLabels.append(labels[i].split(":")[-1])
return deepGovs,deepLabels
def findPath(FE,T,ancestry,phrase,splitted=False,govCol=4,labelCol=5,onlyDeep=False,graph=None):
if onlyDeep:
return deepFindPath(FE,T,ancestry,phrase,graph,splitted,govCol,labelCol)
return deepNSurfaceFindPath(FE,T,ancestry,phrase,splitted,govCol,labelCol)
def deepFindPath(FE,T,ancestry,phrase,graph,splitted=False,govCol=4,labelCol=5):
#le graphe doit avoir été créé
if T!=graph.root:
from myBFS import BFS
BFS(graph,T)
return graph.get_vertex(FE).pathToString()
def deepNSurfaceFindPath(FE,T,ancestry,phrase,splitted=False,govCol=4,labelCol=5,):
treatment=(lambda x: x) if splitted else (lambda x: mysplit(x))
treatlabel=(lambda x: x.split(":")[-1])
if FE not in ancestry.keys():
ancestry[FE]=[]
q=FE
while q != '0' :
if q!=FE and q in ancestry.keys():
ancestry[FE]=ancestry[q]+ancestry[FE]
break
try:
s=treatment(phrase[q])
except KeyError:
print >> sys.stderr, 'Inconsistance dans la phrase: ',phrase, FE, T
return '(invalid)'
gov,label=surfaceDependency(s[govCol],s[labelCol])
ancestry[FE].insert(0,(gov,treatlabel(label)))
q=gov
if T not in ancestry.keys():
ancestry[T]=[]
q=T
while q != '0' :
if q!=T and q in ancestry.keys():
ancestry[T]=ancestry[q]+ancestry[T]
break
try:
s=treatment(phrase[q])
except KeyError:
print >> sys.stderr, 'Inconsistance dans la phrase: ',phrase, FE, T
return '(invalid)'
gov,label=surfaceDependency(s[govCol],s[labelCol])
ancestry[T].insert(0,(gov,(treatlabel(label))))
q=gov
ancestryT=list(ancestry[T])
ancestryFE=list(ancestry[FE])
#print >>sys.stderr, FE, phrase
s=treatment(phrase[FE])
govs,labels=deepDependencies(s[govCol],s[labelCol])
if T in govs:
return "("+treatlabel(labels[govs.index(T)])+")"
s=treatment(phrase[T])
govs,labels=deepDependencies(s[govCol],s[labelCol])
if FE in govs:
return "("+treatlabel(labels[govs.index(FE)]) +")"
q=0
while q < len(ancestryFE) and q < len(ancestryT) and ancestryT[q]==ancestryFE[q] :
q+=1
if q==len(ancestryT)==len(ancestryFE):
q-=1
ancestryT=list(map(lambda x: x[1],ancestryT))
ancestryFE=list(map(lambda x: x[1],ancestryFE))
ancestryT.reverse()
ancestryT=ancestryT[:len(ancestryT)-q]
ancestryT=["-"+ancestryT[i] for i in range(len(ancestryT))]
ancestryFE=ancestryFE[q:]
return "("+",".join(ancestryT+ancestryFE)+")"
def HasRoot(sent,labeldepCol=5,rootLabel='ROOT'):
for k in sent:
if sent[k][labeldepCol]==rootLabel:
return True
return False
def CountRoots(sent,labeldepCol=5,rootLabel='ROOT'):
nb=0
for k in sent:
if sent[k][labeldepCol]==rootLabel:
nb+=1
return nb
def HasCycle(sent,headCol=4):
for k in sent:
i=0
head=k
while(i<len(sent) and head not in ("0",'_')):
head=sent[head][headCol]
i+=1
if head not in ('0','_'):
# print k, head
return True
return False
if __name__ == "__main__":
#if debug: print inspect.stack()[0][3]
print >>sys.stderr, sys.argv[0], "ne peut être utilisé en standalone"