From d07836d6ad00ec1f7cfd8c908e9b51f71ae4f107 Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Thu, 25 May 2017 10:23:47 +0200
Subject: [PATCH] new version of datcha2mcf.pl

---
 datcha/tools/datcha2mcf.pl | 99 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100755 datcha/tools/datcha2mcf.pl

diff --git a/datcha/tools/datcha2mcf.pl b/datcha/tools/datcha2mcf.pl
new file mode 100755
index 0000000..07c0ded
--- /dev/null
+++ b/datcha/tools/datcha2mcf.pl
@@ -0,0 +1,99 @@
+#!/usr/bin/perl
+
+my $token_num = 1;
+
+
+#les parties de discours
+
+$tilt2datcha_pos{"ABR"} = "NOM"; 
+$tilt2datcha_pos{"ANONYM"} = "NAM";
+$tilt2datcha_pos{"NUM"} = "DET";
+$tilt2datcha_pos{"DATE"} = "NOM";
+$tilt2datcha_pos{"DET:ART"} = "DET";
+$tilt2datcha_pos{"DET:ART:AM"} = "DET";
+$tilt2datcha_pos{"DET:IND"} = "DET";
+$tilt2datcha_pos{"DET:POS"} = "DET";
+$tilt2datcha_pos{"PRO:DEM"} = "PRO";
+$tilt2datcha_pos{"PRO:IND"} = "PRO";
+#$tilt2datcha_pos{"PRO:PER"} = "PRO:PER";
+$tilt2datcha_pos{"PRO:PER"} = "PRO";
+$tilt2datcha_pos{"PRO:POS"} = "PRO";
+$tilt2datcha_pos{"PRO:REL"} = "PRO:REL";
+$tilt2datcha_pos{"PUN:cit"} = "PUN";
+$tilt2datcha_pos{"SENT"} = "PUN";
+$tilt2datcha_pos{"SYM"} = "NOM";
+$tilt2datcha_pos{"URL"} = "NOM";
+$tilt2datcha_pos{"HEURE"} = "NOM";
+$tilt2datcha_pos{"XXX"} = "NOM";
+
+
+$tilt2datcha_pos{"ADJ"} = "ADJ";
+$tilt2datcha_pos{"ADV"} = "ADV";
+$tilt2datcha_pos{"ANG"} = "ANG";
+$tilt2datcha_pos{"DET"} = "DET";
+$tilt2datcha_pos{"INT"} = "INT";
+$tilt2datcha_pos{"KON"} = "KON";
+$tilt2datcha_pos{"NAM"} = "NAM";
+$tilt2datcha_pos{"NOM"} = "NOM";
+$tilt2datcha_pos{"PRF"} = "PRF";
+$tilt2datcha_pos{"PRO"} = "PRO";
+$tilt2datcha_pos{"PRP"} = "PRP";
+$tilt2datcha_pos{"PUN"} = "PUN";
+$tilt2datcha_pos{"VER"} = "VER";
+$tilt2datcha_pos{"VER:impe"} = "VER:impe";
+$tilt2datcha_pos{"VER:infi"} = "VER:infi";
+$tilt2datcha_pos{"VER:pper"} = "VER:pper";
+$tilt2datcha_pos{"VER:ppre"} = "VER:ppre";
+
+# les parties de discours simplifiées
+
+$tilt2datcha_cpos{"ABR"} = "NOM"; 
+$tilt2datcha_cpos{"ANONYM"} = "NAM";
+$tilt2datcha_cpos{"NUM"} = "DET";
+$tilt2datcha_cpos{"DATE"} = "NOM";
+$tilt2datcha_cpos{"DET:ART"} = "DET";
+$tilt2datcha_cpos{"DET:ART:AM"} = "DET";
+$tilt2datcha_cpos{"DET:IND"} = "DET";
+$tilt2datcha_cpos{"DET:POS"} = "DET";
+$tilt2datcha_cpos{"PRO:DEM"} = "PRO";
+$tilt2datcha_cpos{"PRO:IND"} = "PRO";
+$tilt2datcha_cpos{"PRO:PER"} = "PRO";
+$tilt2datcha_cpos{"PRO:POS"} = "PRO";
+$tilt2datcha_cpos{"PRO:REL"} = "PRO";
+$tilt2datcha_cpos{"PUN:cit"} = "PUN";
+$tilt2datcha_cpos{"SENT"} = "PUN";
+$tilt2datcha_cpos{"SYM"} = "NOM";
+$tilt2datcha_cpos{"URL"} = "NOM";
+$tilt2datcha_cpos{"HEURE"} = "NOM";
+$tilt2datcha_cpos{"XXX"} = "NOM";
+
+
+$tilt2datcha_cpos{"ADJ"} = "ADJ";
+$tilt2datcha_cpos{"ADV"} = "ADV";
+$tilt2datcha_cpos{"ANG"} = "ANG";
+$tilt2datcha_cpos{"DET"} = "DET";
+$tilt2datcha_cpos{"INT"} = "INT";
+$tilt2datcha_cpos{"KON"} = "KON";
+$tilt2datcha_cpos{"NAM"} = "NAM";
+$tilt2datcha_cpos{"NOM"} = "NOM";
+$tilt2datcha_cpos{"PRF"} = "PRF";
+$tilt2datcha_cpos{"PRO"} = "PRO";
+$tilt2datcha_cpos{"PRP"} = "PRP";
+$tilt2datcha_cpos{"PUN"} = "PUN";
+$tilt2datcha_cpos{"VER"} = "VER";
+$tilt2datcha_cpos{"VER:impe"} = "VER";
+$tilt2datcha_cpos{"VER:infi"} = "VER";
+$tilt2datcha_cpos{"VER:pper"} = "VER";
+$tilt2datcha_cpos{"VER:ppre"} = "VER";
+
+while(<>){
+    chop;
+#Bonjour	Bonjour	OK	INT	bonjour	tchat1	TC	[00:11:09]	Bonjour	IV
+    ($A, $B, $C, $D, $E, $F, $G, $H, $I, $J) = split /\t/;
+    $form =~ s/ /_/g;
+    $lemma =~ s/ /_/g;
+    $cpos = $tilt2datcha_cpos{$pos};
+    $pos = $tilt2datcha_pos{$pos};
+	
+    print "$A\t$B\t$C\t$tilt2datcha_pos{$D}\t$E\t$F\t$G\t$H\t$I\t$J\n";
+}
-- 
GitLab