Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
RL-Parsing
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Franck Dary
RL-Parsing
Commits
272c4c4d
Commit
272c4c4d
authored
Jul 19, 2021
by
Franck Dary
Browse files
Options
Downloads
Patches
Plain Diff
Added erased status
parent
19ec0711
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
Config.py
+4
-3
4 additions, 3 deletions
Config.py
Dicts.py
+11
-9
11 additions, 9 deletions
Dicts.py
Features.py
+5
-1
5 additions, 1 deletion
Features.py
Transition.py
+17
-16
17 additions, 16 deletions
Transition.py
with
37 additions
and
29 deletions
Config.py
+
4
−
3
View file @
272c4c4d
from
readMCD
import
readMCD
from
readMCD
import
readMCD
import
Dicts
import
sys
import
sys
################################################################################
################################################################################
...
@@ -103,7 +104,7 @@ class Config :
...
@@ -103,7 +104,7 @@ class Config :
value
=
str
(
self
.
getAsFeature
(
lineIndex
,
self
.
index2col
[
colIndex
]))
value
=
str
(
self
.
getAsFeature
(
lineIndex
,
self
.
index2col
[
colIndex
]))
if
value
==
""
:
if
value
==
""
:
value
=
"
_
"
value
=
"
_
"
elif
self
.
index2col
[
colIndex
]
==
"
HEAD
"
and
value
!=
"
-1
"
:
elif
self
.
index2col
[
colIndex
]
==
"
HEAD
"
and
value
!=
"
-1
"
and
value
!=
Dicts
.
Dicts
.
erased
:
value
=
self
.
getAsFeature
(
int
(
value
),
"
ID
"
)
value
=
self
.
getAsFeature
(
int
(
value
),
"
ID
"
)
elif
self
.
index2col
[
colIndex
]
==
"
HEAD
"
and
value
==
"
-1
"
:
elif
self
.
index2col
[
colIndex
]
==
"
HEAD
"
and
value
==
"
-1
"
:
value
=
"
0
"
value
=
"
0
"
...
@@ -126,9 +127,9 @@ class Config :
...
@@ -126,9 +127,9 @@ class Config :
value
=
str
(
self
.
getAsFeature
(
index
,
self
.
index2col
[
colIndex
]))
value
=
str
(
self
.
getAsFeature
(
index
,
self
.
index2col
[
colIndex
]))
if
value
==
""
or
value
==
"
_
"
:
if
value
==
""
or
value
==
"
_
"
:
value
=
"
_
"
value
=
"
_
"
elif
self
.
index2col
[
colIndex
]
==
"
HEAD
"
and
value
!=
"
-1
"
:
elif
self
.
index2col
[
colIndex
]
==
"
HEAD
"
and
value
!=
"
-1
"
and
value
!=
Dicts
.
Dicts
.
erased
:
value
=
self
.
getAsFeature
(
int
(
value
),
"
ID
"
)
value
=
self
.
getAsFeature
(
int
(
value
),
"
ID
"
)
elif
self
.
index2col
[
colIndex
]
==
"
HEAD
"
and
value
==
"
-1
"
:
elif
self
.
index2col
[
colIndex
]
==
"
HEAD
"
and
value
==
"
-1
"
and
value
!=
Dicts
.
Dicts
.
erased
:
value
=
"
0
"
value
=
"
0
"
toPrint
.
append
(
value
)
toPrint
.
append
(
value
)
print
(
"
\t
"
.
join
(
toPrint
),
file
=
output
)
print
(
"
\t
"
.
join
(
toPrint
),
file
=
output
)
...
...
This diff is collapsed.
Click to expand it.
Dicts.py
+
11
−
9
View file @
272c4c4d
...
@@ -3,16 +3,18 @@ from readMCD import readMCD
...
@@ -3,16 +3,18 @@ from readMCD import readMCD
################################################################################
################################################################################
class
Dicts
:
class
Dicts
:
unkToken
=
"
__unknown__
"
nullToken
=
"
__null__
"
noStackToken
=
"
__nostack__
"
oobToken
=
"
__oob__
"
noDepLeft
=
"
__nodepleft__
"
noDepRight
=
"
__nodepright__
"
noGov
=
"
__nogov__
"
notSeen
=
"
__notseen__
"
erased
=
"
__erased__
"
def
__init__
(
self
)
:
def
__init__
(
self
)
:
self
.
dicts
=
{}
self
.
dicts
=
{}
self
.
unkToken
=
"
__unknown__
"
self
.
nullToken
=
"
__null__
"
self
.
noStackToken
=
"
__nostack__
"
self
.
oobToken
=
"
__oob__
"
self
.
noDepLeft
=
"
__nodepleft__
"
self
.
noDepRight
=
"
__nodepright__
"
self
.
noGov
=
"
__nogov__
"
self
.
notSeen
=
"
__notseen__
"
def
addDict
(
self
,
name
,
d
)
:
def
addDict
(
self
,
name
,
d
)
:
if
name
in
self
.
dicts
:
if
name
in
self
.
dicts
:
...
@@ -39,7 +41,7 @@ class Dicts :
...
@@ -39,7 +41,7 @@ class Dicts :
targetColumns
=
list
(
col2index
.
keys
())
targetColumns
=
list
(
col2index
.
keys
())
else
:
else
:
targetColumns
=
list
(
colsSet
)
targetColumns
=
list
(
colsSet
)
self
.
dicts
=
{
col
:
{
self
.
unkToken
:
(
0
,
minCount
),
self
.
nullToken
:
(
1
,
minCount
),
self
.
noStackToken
:
(
2
,
minCount
),
self
.
oobToken
:
(
3
,
minCount
),
self
.
noDepLeft
:
(
4
,
minCount
),
self
.
noDepRight
:
(
5
,
minCount
),
self
.
noGov
:
(
6
,
minCount
),
self
.
notSeen
:
(
7
,
minCount
)}
for
col
in
targetColumns
}
self
.
dicts
=
{
col
:
{
self
.
unkToken
:
(
0
,
minCount
),
self
.
nullToken
:
(
1
,
minCount
),
self
.
noStackToken
:
(
2
,
minCount
),
self
.
oobToken
:
(
3
,
minCount
),
self
.
noDepLeft
:
(
4
,
minCount
),
self
.
noDepRight
:
(
5
,
minCount
),
self
.
noGov
:
(
6
,
minCount
),
self
.
notSeen
:
(
7
,
minCount
)
,
self
.
erased
:
(
8
,
minCount
)
}
for
col
in
targetColumns
}
splited
=
line
.
split
(
'
\t
'
)
splited
=
line
.
split
(
'
\t
'
)
for
col
in
targetColumns
:
for
col
in
targetColumns
:
...
...
This diff is collapsed.
Click to expand it.
Features.py
+
5
−
1
View file @
272c4c4d
import
torch
import
torch
import
sys
import
sys
from
Util
import
isEmpty
from
Util
import
isEmpty
import
Dicts
################################################################################
################################################################################
# Input : b=buffer s=stack .0=governor .x=rightChild#x+1 .-x=leftChild#-x-1
# Input : b=buffer s=stack .0=governor .x=rightChild#x+1 .-x=leftChild#-x-1
...
@@ -12,6 +13,7 @@ from Util import isEmpty
...
@@ -12,6 +13,7 @@ from Util import isEmpty
# -4 : No dependent right
# -4 : No dependent right
# -5 : No gov
# -5 : No gov
# -6 : Not seen
# -6 : Not seen
# -7 : Erased
# If incremental is true, only words that have been 'seen' (at wordIndex) can be used
# If incremental is true, only words that have been 'seen' (at wordIndex) can be used
# others will be marked as not seen.
# others will be marked as not seen.
def
extractIndexes
(
config
,
featureFunction
,
incremental
)
:
def
extractIndexes
(
config
,
featureFunction
,
incremental
)
:
...
@@ -39,6 +41,8 @@ def extractIndexes(config, featureFunction, incremental) :
...
@@ -39,6 +41,8 @@ def extractIndexes(config, featureFunction, incremental) :
head
=
config
.
getAsFeature
(
index
,
"
HEAD
"
)
head
=
config
.
getAsFeature
(
index
,
"
HEAD
"
)
if
isEmpty
(
head
)
:
if
isEmpty
(
head
)
:
index
=
-
5
index
=
-
5
elif
head
==
Dicts
.
Dicts
.
erased
:
index
=
-
7
else
:
else
:
index
=
int
(
head
)
index
=
int
(
head
)
continue
continue
...
@@ -62,7 +66,7 @@ def extractIndexes(config, featureFunction, incremental) :
...
@@ -62,7 +66,7 @@ def extractIndexes(config, featureFunction, incremental) :
################################################################################
################################################################################
# For each element of the feature function and for each column, concatenante the dict index
# For each element of the feature function and for each column, concatenante the dict index
def
extractColsFeatures
(
dicts
,
config
,
featureFunction
,
cols
,
incremental
)
:
def
extractColsFeatures
(
dicts
,
config
,
featureFunction
,
cols
,
incremental
)
:
specialValues
=
{
-
1
:
dicts
.
oobToken
,
-
2
:
dicts
.
noStackToken
,
-
3
:
dicts
.
noDepLeft
,
-
4
:
dicts
.
noDepRight
,
-
5
:
dicts
.
noGov
,
-
6
:
dicts
.
notSeen
}
specialValues
=
{
-
1
:
dicts
.
oobToken
,
-
2
:
dicts
.
noStackToken
,
-
3
:
dicts
.
noDepLeft
,
-
4
:
dicts
.
noDepRight
,
-
5
:
dicts
.
noGov
,
-
6
:
dicts
.
notSeen
,
-
7
:
dicts
.
erased
}
indexes
=
extractIndexes
(
config
,
featureFunction
,
incremental
)
indexes
=
extractIndexes
(
config
,
featureFunction
,
incremental
)
totalSize
=
len
(
cols
)
*
len
(
indexes
)
totalSize
=
len
(
cols
)
*
len
(
indexes
)
...
...
This diff is collapsed.
Click to expand it.
Transition.py
+
17
−
16
View file @
272c4c4d
import
sys
import
sys
import
Config
import
Config
import
Dicts
from
Util
import
isEmpty
from
Util
import
isEmpty
################################################################################
################################################################################
...
@@ -53,31 +54,31 @@ class Transition :
...
@@ -53,31 +54,31 @@ class Transition :
def
appliable
(
self
,
config
)
:
def
appliable
(
self
,
config
)
:
if
self
.
name
==
"
RIGHT
"
:
if
self
.
name
==
"
RIGHT
"
:
for
colName
in
config
.
predicted
:
for
colName
in
config
.
predicted
:
if
colName
not
in
[
"
HEAD
"
,
"
DEPREL
"
]
and
isEmpty
(
config
.
getAsFeature
(
config
.
wordIndex
,
colName
))
:
if
colName
not
in
[
"
HEAD
"
,
"
DEPREL
"
]
and
(
isEmpty
(
config
.
getAsFeature
(
config
.
wordIndex
,
colName
))
or
config
.
getAsFeature
(
config
.
wordIndex
,
colName
)
==
Dicts
.
Dicts
.
erased
)
:
return
False
return
False
if
not
(
len
(
config
.
stack
)
>=
self
.
size
and
isEmpty
(
config
.
getAsFeature
(
config
.
wordIndex
,
"
HEAD
"
))
and
not
linkCauseCycle
(
config
,
config
.
stack
[
-
self
.
size
],
config
.
wordIndex
))
:
if
not
(
len
(
config
.
stack
)
>=
self
.
size
and
(
isEmpty
(
config
.
getAsFeature
(
config
.
wordIndex
,
"
HEAD
"
))
or
config
.
getAsFeature
(
config
.
wordIndex
,
"
HEAD
"
)
==
Dicts
.
Dicts
.
erased
)
and
not
linkCauseCycle
(
config
,
config
.
stack
[
-
self
.
size
],
config
.
wordIndex
))
:
return
False
return
False
orphansInStack
=
[
s
for
s
in
config
.
stack
[
-
self
.
size
+
1
:]
if
isEmpty
(
config
.
getAsFeature
(
s
,
"
HEAD
"
))]
if
self
.
size
>
1
else
[]
orphansInStack
=
[
s
for
s
in
config
.
stack
[
-
self
.
size
+
1
:]
if
isEmpty
(
config
.
getAsFeature
(
s
,
"
HEAD
"
))
or
config
.
getAsFeature
(
s
,
"
HEAD
"
)
==
Dicts
.
Dicts
.
erased
]
if
self
.
size
>
1
else
[]
return
len
(
orphansInStack
)
==
0
return
len
(
orphansInStack
)
==
0
if
self
.
name
==
"
LEFT
"
:
if
self
.
name
==
"
LEFT
"
:
for
colName
in
config
.
predicted
:
for
colName
in
config
.
predicted
:
if
colName
not
in
[
"
HEAD
"
,
"
DEPREL
"
]
and
isEmpty
(
config
.
getAsFeature
(
config
.
wordIndex
,
colName
))
:
if
colName
not
in
[
"
HEAD
"
,
"
DEPREL
"
]
and
(
isEmpty
(
config
.
getAsFeature
(
config
.
wordIndex
,
colName
))
or
config
.
getAsFeature
(
config
.
wordIndex
,
colName
)
==
Dicts
.
Dicts
.
erased
)
:
return
False
return
False
if
not
(
len
(
config
.
stack
)
>=
self
.
size
and
isEmpty
(
config
.
getAsFeature
(
config
.
stack
[
-
self
.
size
],
"
HEAD
"
))
and
not
linkCauseCycle
(
config
,
config
.
wordIndex
,
config
.
stack
[
-
self
.
size
]))
:
if
not
(
len
(
config
.
stack
)
>=
self
.
size
and
(
isEmpty
(
config
.
getAsFeature
(
config
.
stack
[
-
self
.
size
],
"
HEAD
"
))
or
config
.
getAsFeature
(
config
.
stack
[
-
self
.
size
],
"
HEAD
"
)
==
Dicts
.
Dicts
.
erased
)
and
not
linkCauseCycle
(
config
,
config
.
wordIndex
,
config
.
stack
[
-
self
.
size
]))
:
return
False
return
False
orphansInStack
=
[
s
for
s
in
config
.
stack
[
-
self
.
size
+
1
:]
if
isEmpty
(
config
.
getAsFeature
(
s
,
"
HEAD
"
))]
if
self
.
size
>
1
else
[]
orphansInStack
=
[
s
for
s
in
config
.
stack
[
-
self
.
size
+
1
:]
if
(
isEmpty
(
config
.
getAsFeature
(
s
,
"
HEAD
"
))
or
config
.
getAsFeature
(
s
,
"
HEAD
"
)
==
Dicts
.
Dicts
.
erased
)
]
if
self
.
size
>
1
else
[]
return
len
(
orphansInStack
)
==
0
return
len
(
orphansInStack
)
==
0
if
self
.
name
==
"
SHIFT
"
:
if
self
.
name
==
"
SHIFT
"
:
for
colName
in
config
.
predicted
:
for
colName
in
config
.
predicted
:
if
colName
not
in
[
"
HEAD
"
,
"
DEPREL
"
]
and
isEmpty
(
config
.
getAsFeature
(
config
.
wordIndex
,
colName
))
:
if
colName
not
in
[
"
HEAD
"
,
"
DEPREL
"
]
and
(
isEmpty
(
config
.
getAsFeature
(
config
.
wordIndex
,
colName
))
or
config
.
getAsFeature
(
config
.
wordIndex
,
colName
)
==
Dicts
.
Dicts
.
erased
)
:
return
False
return
False
return
config
.
wordIndex
<
len
(
config
.
lines
)
-
1
return
config
.
wordIndex
<
len
(
config
.
lines
)
-
1
if
self
.
name
==
"
REDUCE
"
:
if
self
.
name
==
"
REDUCE
"
:
return
len
(
config
.
stack
)
>
0
and
not
isEmpty
(
config
.
getAsFeature
(
config
.
stack
[
-
1
],
"
HEAD
"
))
return
len
(
config
.
stack
)
>
0
and
not
(
isEmpty
(
config
.
getAsFeature
(
config
.
stack
[
-
1
],
"
HEAD
"
))
or
config
.
getAsFeature
(
config
.
stack
[
-
1
],
"
HEAD
"
)
==
Dicts
.
Dicts
.
erased
)
if
self
.
name
==
"
EOS
"
:
if
self
.
name
==
"
EOS
"
:
return
config
.
wordIndex
==
len
(
config
.
lines
)
-
1
return
config
.
wordIndex
==
len
(
config
.
lines
)
-
1
if
self
.
name
==
"
TAG
"
:
if
self
.
name
==
"
TAG
"
:
return
isEmpty
(
config
.
getAsFeature
(
config
.
wordIndex
,
self
.
colName
))
return
isEmpty
(
config
.
getAsFeature
(
config
.
wordIndex
,
self
.
colName
))
or
config
.
getAsFeature
(
config
.
wordIndex
,
self
.
colName
)
==
Dicts
.
Dicts
.
erased
if
self
.
name
==
"
NOBACK
"
:
if
self
.
name
==
"
NOBACK
"
:
return
True
return
True
if
"
BACK
"
in
self
.
name
:
if
"
BACK
"
in
self
.
name
:
...
@@ -149,7 +150,7 @@ def nbLinksBufferStack(config) :
...
@@ -149,7 +150,7 @@ def nbLinksBufferStack(config) :
################################################################################
################################################################################
# Return True if link between from and to would cause a cycle
# Return True if link between from and to would cause a cycle
def
linkCauseCycle
(
config
,
fromIndex
,
toIndex
)
:
def
linkCauseCycle
(
config
,
fromIndex
,
toIndex
)
:
while
not
isEmpty
(
config
.
getAsFeature
(
fromIndex
,
"
HEAD
"
))
:
while
not
isEmpty
(
config
.
getAsFeature
(
fromIndex
,
"
HEAD
"
))
and
not
config
.
getAsFeature
(
fromIndex
,
"
HEAD
"
)
==
Dicts
.
Dicts
.
erased
:
fromIndex
=
int
(
config
.
getAsFeature
(
fromIndex
,
"
HEAD
"
))
fromIndex
=
int
(
config
.
getAsFeature
(
fromIndex
,
"
HEAD
"
))
if
fromIndex
==
toIndex
:
if
fromIndex
==
toIndex
:
return
True
return
True
...
@@ -208,7 +209,7 @@ def applyBackRight(config, data, size) :
...
@@ -208,7 +209,7 @@ def applyBackRight(config, data, size) :
config
.
stack
.
pop
()
config
.
stack
.
pop
()
while
len
(
data
)
>
0
:
while
len
(
data
)
>
0
:
config
.
stack
.
append
(
data
.
pop
())
config
.
stack
.
append
(
data
.
pop
())
config
.
set
(
config
.
wordIndex
,
"
HEAD
"
,
""
)
config
.
set
(
config
.
wordIndex
,
"
HEAD
"
,
Dicts
.
Dicts
.
erased
)
config
.
predChilds
[
config
.
stack
[
-
size
]].
pop
()
config
.
predChilds
[
config
.
stack
[
-
size
]].
pop
()
################################################################################
################################################################################
...
@@ -217,7 +218,7 @@ def applyBackLeft(config, data, size) :
...
@@ -217,7 +218,7 @@ def applyBackLeft(config, data, size) :
config
.
stack
.
append
(
data
.
pop
())
config
.
stack
.
append
(
data
.
pop
())
while
len
(
data
)
>
0
:
while
len
(
data
)
>
0
:
config
.
stack
.
append
(
data
.
pop
())
config
.
stack
.
append
(
data
.
pop
())
config
.
set
(
config
.
stack
[
-
size
],
"
HEAD
"
,
""
)
config
.
set
(
config
.
stack
[
-
size
],
"
HEAD
"
,
Dicts
.
Dicts
.
erased
)
config
.
predChilds
[
config
.
wordIndex
].
pop
()
config
.
predChilds
[
config
.
wordIndex
].
pop
()
################################################################################
################################################################################
...
@@ -233,7 +234,7 @@ def applyBackReduce(config, data) :
...
@@ -233,7 +234,7 @@ def applyBackReduce(config, data) :
################################################################################
################################################################################
def
applyBackTag
(
config
,
colName
)
:
def
applyBackTag
(
config
,
colName
)
:
config
.
set
(
config
.
wordIndex
,
colName
,
""
)
config
.
set
(
config
.
wordIndex
,
colName
,
Dicts
.
Dicts
.
erased
)
################################################################################
################################################################################
################################################################################
################################################################################
...
@@ -273,9 +274,9 @@ def applyEOS(config) :
...
@@ -273,9 +274,9 @@ def applyEOS(config) :
if
not
config
.
hasCol
(
"
HEAD
"
)
or
not
config
.
isPredicted
(
"
HEAD
"
)
:
if
not
config
.
hasCol
(
"
HEAD
"
)
or
not
config
.
isPredicted
(
"
HEAD
"
)
:
return
return
rootCandidates
=
[
index
for
index
in
config
.
stack
if
not
config
.
isMultiword
(
index
)
and
isEmpty
(
config
.
getAsFeature
(
index
,
"
HEAD
"
))]
rootCandidates
=
[
index
for
index
in
config
.
stack
if
not
config
.
isMultiword
(
index
)
and
(
isEmpty
(
config
.
getAsFeature
(
index
,
"
HEAD
"
))
or
config
.
getAsFeature
(
index
,
"
HEAD
"
)
==
Dicts
.
Dicts
.
erased
)
]
if
len
(
rootCandidates
)
==
0
:
if
len
(
rootCandidates
)
==
0
:
rootCandidates
=
[
index
for
index
in
range
(
len
(
config
.
lines
))
if
not
config
.
isMultiword
(
index
)
and
isEmpty
(
config
.
getAsFeature
(
index
,
"
HEAD
"
))]
rootCandidates
=
[
index
for
index
in
range
(
len
(
config
.
lines
))
if
not
config
.
isMultiword
(
index
)
and
(
isEmpty
(
config
.
getAsFeature
(
index
,
"
HEAD
"
))
or
config
.
getAsFeature
(
index
,
"
HEAD
"
)
==
Dicts
.
Dicts
.
erased
)
]
if
len
(
rootCandidates
)
==
0
:
if
len
(
rootCandidates
)
==
0
:
print
(
"
ERROR : no candidates for root
"
,
file
=
sys
.
stderr
)
print
(
"
ERROR : no candidates for root
"
,
file
=
sys
.
stderr
)
...
@@ -287,7 +288,7 @@ def applyEOS(config) :
...
@@ -287,7 +288,7 @@ def applyEOS(config) :
config
.
set
(
rootIndex
,
"
DEPREL
"
,
"
root
"
)
config
.
set
(
rootIndex
,
"
DEPREL
"
,
"
root
"
)
for
index
in
range
(
len
(
config
.
lines
))
:
for
index
in
range
(
len
(
config
.
lines
))
:
if
config
.
isMultiword
(
index
)
or
not
isEmpty
(
config
.
getAsFeature
(
index
,
"
HEAD
"
))
:
if
config
.
isMultiword
(
index
)
or
not
(
isEmpty
(
config
.
getAsFeature
(
index
,
"
HEAD
"
))
or
config
.
getAsFeature
(
index
,
"
HEAD
"
)
==
Dicts
.
Dicts
.
erased
)
:
continue
continue
config
.
set
(
index
,
"
HEAD
"
,
str
(
rootIndex
))
config
.
set
(
index
,
"
HEAD
"
,
str
(
rootIndex
))
config
.
predChilds
[
rootIndex
].
append
(
index
)
config
.
predChilds
[
rootIndex
].
append
(
index
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment