Skip to content
Snippets Groups Projects
Commit 9c65f413 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

added conll_keep_most_frequent_morpho_tags.pl

parent 63783205
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/perl
$conll = shift;
$threshold = shift;
open(my $CONLL, "<", $conll)
or die "Can't open < $conll: $!";
while(<$CONLL>){
if(!/^#/){
($index, $form, $lemma, $cpos, $pos, $morpho, $gov, $label) = split /\t/;
$hash_morpho{$morpho}++;
}
}
close($CONLL);
open(my $CONLL, "<", $conll)
or die "Can't open < $conll: $!";
while(<$CONLL>){
if(/^\n/){
print "\n";
}
else{
chop;
if(!/^#/){
($index, $form, $lemma, $cpos, $pos, $morpho, $gov, $label) = split /\t/;
if($hash_morpho{$morpho} < $threshold){
$morpho = "_";
}
print "$index\t$form\t$lemma\t$cpos\t$pos\t$morpho\t$gov\t$label\t_\t_\n";
}
}
}
close($CONLL);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment