Skip to content
Snippets Groups Projects
Commit d07836d6 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

new version of datcha2mcf.pl

parent 1c005dbf
Branches
No related tags found
No related merge requests found
#!/usr/bin/perl
my $token_num = 1;
#les parties de discours
$tilt2datcha_pos{"ABR"} = "NOM";
$tilt2datcha_pos{"ANONYM"} = "NAM";
$tilt2datcha_pos{"NUM"} = "DET";
$tilt2datcha_pos{"DATE"} = "NOM";
$tilt2datcha_pos{"DET:ART"} = "DET";
$tilt2datcha_pos{"DET:ART:AM"} = "DET";
$tilt2datcha_pos{"DET:IND"} = "DET";
$tilt2datcha_pos{"DET:POS"} = "DET";
$tilt2datcha_pos{"PRO:DEM"} = "PRO";
$tilt2datcha_pos{"PRO:IND"} = "PRO";
#$tilt2datcha_pos{"PRO:PER"} = "PRO:PER";
$tilt2datcha_pos{"PRO:PER"} = "PRO";
$tilt2datcha_pos{"PRO:POS"} = "PRO";
$tilt2datcha_pos{"PRO:REL"} = "PRO:REL";
$tilt2datcha_pos{"PUN:cit"} = "PUN";
$tilt2datcha_pos{"SENT"} = "PUN";
$tilt2datcha_pos{"SYM"} = "NOM";
$tilt2datcha_pos{"URL"} = "NOM";
$tilt2datcha_pos{"HEURE"} = "NOM";
$tilt2datcha_pos{"XXX"} = "NOM";
$tilt2datcha_pos{"ADJ"} = "ADJ";
$tilt2datcha_pos{"ADV"} = "ADV";
$tilt2datcha_pos{"ANG"} = "ANG";
$tilt2datcha_pos{"DET"} = "DET";
$tilt2datcha_pos{"INT"} = "INT";
$tilt2datcha_pos{"KON"} = "KON";
$tilt2datcha_pos{"NAM"} = "NAM";
$tilt2datcha_pos{"NOM"} = "NOM";
$tilt2datcha_pos{"PRF"} = "PRF";
$tilt2datcha_pos{"PRO"} = "PRO";
$tilt2datcha_pos{"PRP"} = "PRP";
$tilt2datcha_pos{"PUN"} = "PUN";
$tilt2datcha_pos{"VER"} = "VER";
$tilt2datcha_pos{"VER:impe"} = "VER:impe";
$tilt2datcha_pos{"VER:infi"} = "VER:infi";
$tilt2datcha_pos{"VER:pper"} = "VER:pper";
$tilt2datcha_pos{"VER:ppre"} = "VER:ppre";
# les parties de discours simplifiées
$tilt2datcha_cpos{"ABR"} = "NOM";
$tilt2datcha_cpos{"ANONYM"} = "NAM";
$tilt2datcha_cpos{"NUM"} = "DET";
$tilt2datcha_cpos{"DATE"} = "NOM";
$tilt2datcha_cpos{"DET:ART"} = "DET";
$tilt2datcha_cpos{"DET:ART:AM"} = "DET";
$tilt2datcha_cpos{"DET:IND"} = "DET";
$tilt2datcha_cpos{"DET:POS"} = "DET";
$tilt2datcha_cpos{"PRO:DEM"} = "PRO";
$tilt2datcha_cpos{"PRO:IND"} = "PRO";
$tilt2datcha_cpos{"PRO:PER"} = "PRO";
$tilt2datcha_cpos{"PRO:POS"} = "PRO";
$tilt2datcha_cpos{"PRO:REL"} = "PRO";
$tilt2datcha_cpos{"PUN:cit"} = "PUN";
$tilt2datcha_cpos{"SENT"} = "PUN";
$tilt2datcha_cpos{"SYM"} = "NOM";
$tilt2datcha_cpos{"URL"} = "NOM";
$tilt2datcha_cpos{"HEURE"} = "NOM";
$tilt2datcha_cpos{"XXX"} = "NOM";
$tilt2datcha_cpos{"ADJ"} = "ADJ";
$tilt2datcha_cpos{"ADV"} = "ADV";
$tilt2datcha_cpos{"ANG"} = "ANG";
$tilt2datcha_cpos{"DET"} = "DET";
$tilt2datcha_cpos{"INT"} = "INT";
$tilt2datcha_cpos{"KON"} = "KON";
$tilt2datcha_cpos{"NAM"} = "NAM";
$tilt2datcha_cpos{"NOM"} = "NOM";
$tilt2datcha_cpos{"PRF"} = "PRF";
$tilt2datcha_cpos{"PRO"} = "PRO";
$tilt2datcha_cpos{"PRP"} = "PRP";
$tilt2datcha_cpos{"PUN"} = "PUN";
$tilt2datcha_cpos{"VER"} = "VER";
$tilt2datcha_cpos{"VER:impe"} = "VER";
$tilt2datcha_cpos{"VER:infi"} = "VER";
$tilt2datcha_cpos{"VER:pper"} = "VER";
$tilt2datcha_cpos{"VER:ppre"} = "VER";
while(<>){
chop;
#Bonjour Bonjour OK INT bonjour tchat1 TC [00:11:09] Bonjour IV
($A, $B, $C, $D, $E, $F, $G, $H, $I, $J) = split /\t/;
$form =~ s/ /_/g;
$lemma =~ s/ /_/g;
$cpos = $tilt2datcha_cpos{$pos};
$pos = $tilt2datcha_pos{$pos};
print "$A\t$B\t$C\t$tilt2datcha_pos{$D}\t$E\t$F\t$G\t$H\t$I\t$J\n";
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment