From 91d10249fbf52bbd55750d1567c73cfcbc8c18ef Mon Sep 17 00:00:00 2001
From: BLADIER Tatiana <tatiana.bladier@lis-lab.fr>
Date: Fri, 16 May 2025 10:35:29 +0200
Subject: [PATCH] add readfile option

---
 .../tania-some-other-metrics-checkpoint.ipynb | 241 ++++++++++++++----
 tania_scripts/tania-some-other-metrics.ipynb  | 241 ++++++++++++++----
 2 files changed, 376 insertions(+), 106 deletions(-)

diff --git a/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb b/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb
index a918ace..0197b08 100644
--- a/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb
+++ b/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb
@@ -1015,7 +1015,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 43,
    "id": "673d6a76-42a0-4dcd-9c54-ae18710a032a",
    "metadata": {},
    "outputs": [
@@ -1023,12 +1023,6 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--\tponct\t<nul>@@<nul>\t0\n",
-      "Le\tdet\tNP@@<nul>\t0\n",
-      "parlement\tnc\t<nul>@@<nul>\t1\n",
-      "...\tponct\t<nul>@@SENT\t0\n",
-      "</s>\t</s>\t</s>\t0\n",
-      "\n",
       "                 SENT             \n",
       "                  |                \n",
       "                 SENT             \n",
@@ -1047,21 +1041,6 @@
       "    (ponct ...)\n",
       "    (</s> (</s> </s>))))\n",
       "Tree depth: 5\n",
-      "<s>\t<s>\t<s>\t1\n",
-      "--\tponct\t<nul>@@<nul>\t0\n",
-      "Vous\tpro\tVN@@<nul>\t0\n",
-      ",\tponct\t<nul>@@<nul>\t1\n",
-      "dit\tv\t<nul>@@<nul>\t1\n",
-      "Athos\tnpp\tNP-OBJ@@Sint-MOD\t1\n",
-      ",\tponct\t<nul>@@<nul>\t1\n",
-      "je\tcls-suj\tVN@@<nul>\t1\n",
-      "ne\tadv\t<nul>@@<nul>\t2\n",
-      "me\tv\t<nul>@@<nul>\t2\n",
-      "reconnais\tv\t<nul>@@<nul>\t2\n",
-      "pas\tadv\t<nul>@@<nul>\t2\n",
-      ".\tponct\t<nul>@@<nul>\t0\n",
-      "</s>\t</s>\t</s>\t0\n",
-      "\n",
       "                                  SENT                                           \n",
       "                                   |                                              \n",
       "                                  <s>                                            \n",
@@ -1086,14 +1065,6 @@
       "    (ponct .)\n",
       "    (</s> (</s> </s>))))\n",
       "Tree depth: 5\n",
-      "<s>\t<s>\t<s>\t1\n",
-      "--\tponct\t<nul>@@<nul>\t1\n",
-      "M.\tnpp\tNP-SUJ@@<nul>\t1\n",
-      "d'\tp\tNPP+@@<nul>\t2\n",
-      "Artagnan\tnpp\t<nul>@@<nul>\t3\n",
-      ".\tponct\t<nul>@@<nul>\t0\n",
-      "</s>\t</s>\t</s>\t0\n",
-      "\n",
       "               SENT                          \n",
       "                |                             \n",
       "               <s>                           \n",
@@ -1117,11 +1088,6 @@
       "    (ponct .)\n",
       "    (</s> (</s> </s>))))\n",
       "Tree depth: 6\n",
-      "<s>\t<s>\t<s>\t1\n",
-      "Acté\tnc\tNP-OBJ@@<nul>\t1\n",
-      "?\tponct\t<nul>@@<nul>\t0\n",
-      "</s>\t</s>\t</s>\t0\n",
-      "\n",
       "         SENT            \n",
       "          |               \n",
       "         <s>             \n",
@@ -1137,16 +1103,6 @@
       "NLTK TREE (SENT\n",
       "  (<s> (<s> <s> (NP-OBJ (nc Acté))) (ponct ?) (</s> (</s> </s>))))\n",
       "Tree depth: 5\n",
-      "<s>\t<s>\t<s>\t1\n",
-      "--\tponct\t<nul>@@<nul>\t1\n",
-      "Oui\tadj\tAP-ATS@@<nul>\t1\n",
-      ",\tponct\t<nul>@@<nul>\t1\n",
-      "répondit\tv\tVN@@<nul>\t1\n",
-      "le\tdet\tNP-OBJ@@Sint-MOD\t2\n",
-      "mousquetaire\tnc\t<nul>@@<nul>\t3\n",
-      ".\tponct\t<nul>@@<nul>\t0\n",
-      "</s>\t</s>\t</s>\t0\n",
-      "\n",
       "                         SENT                                          \n",
       "                          |                                             \n",
       "                         <s>                                           \n",
@@ -1173,7 +1129,144 @@
       "        (NP-OBJ (det le) (nc mousquetaire))))\n",
       "    (ponct .)\n",
       "    (</s> (</s> </s>))))\n",
-      "Tree depth: 6\n"
+      "Tree depth: 6\n",
+      "                                                    SENT                                                              \n",
+      "                                                     |                                                                 \n",
+      "                                                    <s>                                                               \n",
+      "                                             ________|_____________________________________________________________    \n",
+      "                                         Sint-MOD                                                             |    |  \n",
+      "             _______________________________|_____________________________________                            |    |   \n",
+      "            |                   |           |              |                   Ssub-OBJ                       |    |  \n",
+      "            |                   |           |              |         _____________|________                   |    |   \n",
+      "           <s>                  |           |              |        |                     Sint                |    |  \n",
+      "  __________|__________         |           |              |        |              ________|_____________     |    |   \n",
+      " |    |     |     |    VN     NP-OBJ        |              VN       |             VN                     |    |   </s>\n",
+      " |    |     |     |    |    ____|_____      |         _____|___     |      _______|________________      |    |    |   \n",
+      " |  ponct  adv  ponct  v  det         nc  ponct   cls-suj      v    cs cls-suj   adv    clo-obj    v    adv ponct </s>\n",
+      " |    |     |     |    |   |          |     |        |         |    |     |       |        |       |     |    |    |   \n",
+      "<s>   --  Alors   ,   dit  le        roi    ,        il       faut que   vous     ne       le   rendiez pas   .   </s>\n",
+      "\n",
+      "NLTK TREE (SENT\n",
+      "  (<s>\n",
+      "    (Sint-MOD\n",
+      "      (<s> <s> (ponct --) (adv Alors) (ponct ,) (VN (v dit)))\n",
+      "      (NP-OBJ (det le) (nc roi))\n",
+      "      (ponct ,)\n",
+      "      (VN (cls-suj il) (v faut))\n",
+      "      (Ssub-OBJ\n",
+      "        (cs que)\n",
+      "        (Sint\n",
+      "          (VN (cls-suj vous) (adv ne) (clo-obj le) (v rendiez))\n",
+      "          (adv pas))))\n",
+      "    (ponct .)\n",
+      "    (</s> (</s> </s>))))\n",
+      "Tree depth: 7\n",
+      "                                                          SENT                                                          \n",
+      "                                                           |                                                             \n",
+      "                                                          <s>                                                           \n",
+      "                                                    _______|_________________________________________________________    \n",
+      "                                                  <s>                                                           |    |  \n",
+      "  _________________________________________________|____________________                                        |    |   \n",
+      " |    |     |      |                                                    VN                                      |    |  \n",
+      " |    |     |      |       _____________________________________________|_________________                      |    |   \n",
+      " |    |     |      |      |     |    |      |      |              |           |         COORD                   |    |  \n",
+      " |    |     |      |      |     |    |      |      |              |           |     ______|_____                |    |   \n",
+      " |    |   AP-ATS   |      |     |  AP-ATS   |      |            NP-OBJ        |    |            VN              |   </s>\n",
+      " |    |     |      |      |     |    |      |      |        ______|_____      |    |       _____|________       |    |   \n",
+      " |  ponct  adv   ponct cls-suj  v   adj   ponct cls-suj clo-obj         v   ponct  cc  cls-suj clr  v   vpp   ponct </s>\n",
+      " |    |     |      |      |     |    |      |      |       |            |     |    |      |     |   |    |      |    |   \n",
+      "<s>   --   Non     ,      il   est  vrai    ,      je      le          suis   ,   mais    il    m'  a  semblé  ...  </s>\n",
+      "\n",
+      "NLTK TREE (SENT\n",
+      "  (<s>\n",
+      "    (<s>\n",
+      "      <s>\n",
+      "      (ponct --)\n",
+      "      (AP-ATS (adv Non))\n",
+      "      (ponct ,)\n",
+      "      (VN\n",
+      "        (cls-suj il)\n",
+      "        (v est)\n",
+      "        (AP-ATS (adj vrai))\n",
+      "        (ponct ,)\n",
+      "        (cls-suj je)\n",
+      "        (NP-OBJ (clo-obj le) (v suis))\n",
+      "        (ponct ,)\n",
+      "        (COORD\n",
+      "          (cc mais)\n",
+      "          (VN (cls-suj il) (clr m') (v a) (vpp semblé)))))\n",
+      "    (ponct ...)\n",
+      "    (</s> (</s> </s>))))\n",
+      "Tree depth: 7\n",
+      "     SENT     \n",
+      "      |        \n",
+      "     <s>      \n",
+      "      |        \n",
+      "     <s>      \n",
+      "  ____|____    \n",
+      " |    |   </s>\n",
+      " |    |    |   \n",
+      " |  ponct </s>\n",
+      " |    |    |   \n",
+      "<s>   --  </s>\n",
+      "\n",
+      "NLTK TREE (SENT (<s> (<s> <s> (ponct --) (</s> (</s> </s>)))))\n",
+      "Tree depth: 5\n",
+      "                                  SENT                                 \n",
+      "                                   |                                    \n",
+      "                                  <s>                                  \n",
+      "                ___________________|________________________________    \n",
+      "            Sint-MOD               |         Sint-MOD          |    |  \n",
+      "  _____________|______             |      ______|_______       |    |   \n",
+      " |                    VN           |     VN           NP-OBJ   |   </s>\n",
+      " |      ______________|_____       |     |              |      |    |   \n",
+      "<s> cls-suj           v    vpp   ponct  vinf           adv   ponct </s>\n",
+      " |     |              |     |      |     |              |      |    |   \n",
+      "<s>    Il             a  répondu   :   voilà           tout    .   </s>\n",
+      "\n",
+      "NLTK TREE (SENT\n",
+      "  (<s>\n",
+      "    (Sint-MOD (<s> <s>) (VN (cls-suj Il) (v a) (vpp répondu)))\n",
+      "    (ponct :)\n",
+      "    (Sint-MOD (VN (vinf voilà)) (NP-OBJ (adv tout)))\n",
+      "    (ponct .)\n",
+      "    (</s> (</s> </s>))))\n",
+      "Tree depth: 5\n",
+      "                                      SENT                                                    \n",
+      "                                       |                                                       \n",
+      "                                      <s>                                                     \n",
+      "                                       |                                                       \n",
+      "                                    Sint-MOD                                                  \n",
+      "  _____________________________________|____________________                                   \n",
+      " |           |              |                             COORD                               \n",
+      " |           |              |     __________________________|____________________              \n",
+      " |           |              |    |            |       |     |                    PP           \n",
+      " |           |              |    |            |       |     |      ______________|____         \n",
+      " |           |              |    |            |       |     |     |                 VPinf     \n",
+      " |           |              |    |            |       |     |     |       ____________|____    \n",
+      " |           VN             |    |            VN      |   AP-ATS  |      VN           |   </s>\n",
+      " |      _____|________      |    |      ______|___    |     |     |      |            |    |   \n",
+      "<s> cls-suj adv adv   v   ponct  cc cls-suj       v  adv   adj    p     vinf        ponct </s>\n",
+      " |     |     |   |    |     |    |     |          |   |     |     |      |            |    |   \n",
+      "<s>    Je    ne puis dire   ,   car    il        est bien facile  à  comprendre       .   </s>\n",
+      "\n",
+      "NLTK TREE (SENT\n",
+      "  (<s>\n",
+      "    (Sint-MOD\n",
+      "      (<s> <s>)\n",
+      "      (VN (cls-suj Je) (adv ne) (adv puis) (v dire))\n",
+      "      (ponct ,)\n",
+      "      (COORD\n",
+      "        (cc car)\n",
+      "        (VN (cls-suj il) (v est))\n",
+      "        (adv bien)\n",
+      "        (AP-ATS (adj facile))\n",
+      "        (PP\n",
+      "          (p à)\n",
+      "          (VPinf (VN (vinf comprendre)) (ponct .) (</s> (</s> </s>))))))))\n",
+      "Tree depth: 8\n",
+      "-- Le parlement ... </s> <s> -- Vous , dit Athos , je ne me reconnais pas . </s> <s> -- M. d' Artagnan . </s> <s> Acté ? </s> <s> -- Oui , répondit le mousquetaire . </s> <s> -- Alors , dit le roi , il faut que vous ne le rendiez pas . </s> <s> -- Non , il est vrai , je le suis , mais il m' a semblé ... </s> <s> -- </s> <s> Il a répondu : voilà tout . </s> <s> Je ne puis dire , car il est bien facile à comprendre . </s> \n",
+      "0\n"
      ]
     }
    ],
@@ -1182,15 +1275,21 @@
     "\n",
     "sents = readFile(file_conll)\n",
     "exepts = []\n",
-    "for sent in sents[:5]:\n",
+    "file_text = ''\n",
+    "for sent in sents[:10]:\n",
     "    sent_lst = ['\\t'.join(word)+'\\n' for word in sent]\n",
-    "    sent_str = ''.join(sent_lst)\n",
-    "    print(sent_str)\n",
+    "    triple_str = ''.join(sent_lst)\n",
+    "    sent_text = ' '.join([word[0] for word in sent]) + ' '\n",
+    "    file_text += sent_text\n",
     "    try:\n",
-    "        visualize_const_prediction(sent_str)\n",
+    "        visualize_const_prediction(triple_str)\n",
     "    except:\n",
-    "        exepts.append(sent_str)\n",
-    "print(len(exepts))"
+    "        exepts.append(triple_str)\n",
+    "print(file_text)\n",
+    "print(\"Number of sentences without a tree: \", len(exepts))\n",
+    "\n",
+    "for exept in exepts:\n",
+    "    print(exept)"
    ]
   },
   {
@@ -1199,7 +1298,43 @@
    "id": "729d7671-02be-443b-94ee-ddc79284aac4",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "def prepare_file_to_evaluate(file_conll):\n",
+    "    sents = readFile(file_conll)\n",
+    "    file_text = ''\n",
+    "    triple_strings = []\n",
+    "    for sent in sents[:10]:\n",
+    "        sent_lst = ['\\t'.join(word)+'\\n' for word in sent]\n",
+    "        triple_str = ''.join(sent_lst)\n",
+    "        sent_text = ' '.join([word[0] for word in sent]) + ' '\n",
+    "        file_text += sent_text\n",
+    "        triple_strings.append(triple_str)\n",
+    "    return file_text, triple_strings\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cefe6712-ab53-4589-9fb8-8d4c3fb00378",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_text, triple_strings = prepare_file_to_evaluate(file_conll)\n",
+    "\n",
+    "exepts = [] \n",
+    "\n",
+    "for triple_str in triple_strings:\n",
+    "    try:\n",
+    "        visualize_const_prediction(triple_str)\n",
+    "    except:\n",
+    "        exepts.append(triple_str)\n",
+    "print(\"Number of sentences without a tree: \", len(exepts))\n",
+    "    \n",
+    "for exept in exepts:\n",
+    "    print(exept)\n",
+    "    \n"
+   ]
   }
  ],
  "metadata": {
diff --git a/tania_scripts/tania-some-other-metrics.ipynb b/tania_scripts/tania-some-other-metrics.ipynb
index a918ace..0197b08 100644
--- a/tania_scripts/tania-some-other-metrics.ipynb
+++ b/tania_scripts/tania-some-other-metrics.ipynb
@@ -1015,7 +1015,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 43,
    "id": "673d6a76-42a0-4dcd-9c54-ae18710a032a",
    "metadata": {},
    "outputs": [
@@ -1023,12 +1023,6 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--\tponct\t<nul>@@<nul>\t0\n",
-      "Le\tdet\tNP@@<nul>\t0\n",
-      "parlement\tnc\t<nul>@@<nul>\t1\n",
-      "...\tponct\t<nul>@@SENT\t0\n",
-      "</s>\t</s>\t</s>\t0\n",
-      "\n",
       "                 SENT             \n",
       "                  |                \n",
       "                 SENT             \n",
@@ -1047,21 +1041,6 @@
       "    (ponct ...)\n",
       "    (</s> (</s> </s>))))\n",
       "Tree depth: 5\n",
-      "<s>\t<s>\t<s>\t1\n",
-      "--\tponct\t<nul>@@<nul>\t0\n",
-      "Vous\tpro\tVN@@<nul>\t0\n",
-      ",\tponct\t<nul>@@<nul>\t1\n",
-      "dit\tv\t<nul>@@<nul>\t1\n",
-      "Athos\tnpp\tNP-OBJ@@Sint-MOD\t1\n",
-      ",\tponct\t<nul>@@<nul>\t1\n",
-      "je\tcls-suj\tVN@@<nul>\t1\n",
-      "ne\tadv\t<nul>@@<nul>\t2\n",
-      "me\tv\t<nul>@@<nul>\t2\n",
-      "reconnais\tv\t<nul>@@<nul>\t2\n",
-      "pas\tadv\t<nul>@@<nul>\t2\n",
-      ".\tponct\t<nul>@@<nul>\t0\n",
-      "</s>\t</s>\t</s>\t0\n",
-      "\n",
       "                                  SENT                                           \n",
       "                                   |                                              \n",
       "                                  <s>                                            \n",
@@ -1086,14 +1065,6 @@
       "    (ponct .)\n",
       "    (</s> (</s> </s>))))\n",
       "Tree depth: 5\n",
-      "<s>\t<s>\t<s>\t1\n",
-      "--\tponct\t<nul>@@<nul>\t1\n",
-      "M.\tnpp\tNP-SUJ@@<nul>\t1\n",
-      "d'\tp\tNPP+@@<nul>\t2\n",
-      "Artagnan\tnpp\t<nul>@@<nul>\t3\n",
-      ".\tponct\t<nul>@@<nul>\t0\n",
-      "</s>\t</s>\t</s>\t0\n",
-      "\n",
       "               SENT                          \n",
       "                |                             \n",
       "               <s>                           \n",
@@ -1117,11 +1088,6 @@
       "    (ponct .)\n",
       "    (</s> (</s> </s>))))\n",
       "Tree depth: 6\n",
-      "<s>\t<s>\t<s>\t1\n",
-      "Acté\tnc\tNP-OBJ@@<nul>\t1\n",
-      "?\tponct\t<nul>@@<nul>\t0\n",
-      "</s>\t</s>\t</s>\t0\n",
-      "\n",
       "         SENT            \n",
       "          |               \n",
       "         <s>             \n",
@@ -1137,16 +1103,6 @@
       "NLTK TREE (SENT\n",
       "  (<s> (<s> <s> (NP-OBJ (nc Acté))) (ponct ?) (</s> (</s> </s>))))\n",
       "Tree depth: 5\n",
-      "<s>\t<s>\t<s>\t1\n",
-      "--\tponct\t<nul>@@<nul>\t1\n",
-      "Oui\tadj\tAP-ATS@@<nul>\t1\n",
-      ",\tponct\t<nul>@@<nul>\t1\n",
-      "répondit\tv\tVN@@<nul>\t1\n",
-      "le\tdet\tNP-OBJ@@Sint-MOD\t2\n",
-      "mousquetaire\tnc\t<nul>@@<nul>\t3\n",
-      ".\tponct\t<nul>@@<nul>\t0\n",
-      "</s>\t</s>\t</s>\t0\n",
-      "\n",
       "                         SENT                                          \n",
       "                          |                                             \n",
       "                         <s>                                           \n",
@@ -1173,7 +1129,144 @@
       "        (NP-OBJ (det le) (nc mousquetaire))))\n",
       "    (ponct .)\n",
       "    (</s> (</s> </s>))))\n",
-      "Tree depth: 6\n"
+      "Tree depth: 6\n",
+      "                                                    SENT                                                              \n",
+      "                                                     |                                                                 \n",
+      "                                                    <s>                                                               \n",
+      "                                             ________|_____________________________________________________________    \n",
+      "                                         Sint-MOD                                                             |    |  \n",
+      "             _______________________________|_____________________________________                            |    |   \n",
+      "            |                   |           |              |                   Ssub-OBJ                       |    |  \n",
+      "            |                   |           |              |         _____________|________                   |    |   \n",
+      "           <s>                  |           |              |        |                     Sint                |    |  \n",
+      "  __________|__________         |           |              |        |              ________|_____________     |    |   \n",
+      " |    |     |     |    VN     NP-OBJ        |              VN       |             VN                     |    |   </s>\n",
+      " |    |     |     |    |    ____|_____      |         _____|___     |      _______|________________      |    |    |   \n",
+      " |  ponct  adv  ponct  v  det         nc  ponct   cls-suj      v    cs cls-suj   adv    clo-obj    v    adv ponct </s>\n",
+      " |    |     |     |    |   |          |     |        |         |    |     |       |        |       |     |    |    |   \n",
+      "<s>   --  Alors   ,   dit  le        roi    ,        il       faut que   vous     ne       le   rendiez pas   .   </s>\n",
+      "\n",
+      "NLTK TREE (SENT\n",
+      "  (<s>\n",
+      "    (Sint-MOD\n",
+      "      (<s> <s> (ponct --) (adv Alors) (ponct ,) (VN (v dit)))\n",
+      "      (NP-OBJ (det le) (nc roi))\n",
+      "      (ponct ,)\n",
+      "      (VN (cls-suj il) (v faut))\n",
+      "      (Ssub-OBJ\n",
+      "        (cs que)\n",
+      "        (Sint\n",
+      "          (VN (cls-suj vous) (adv ne) (clo-obj le) (v rendiez))\n",
+      "          (adv pas))))\n",
+      "    (ponct .)\n",
+      "    (</s> (</s> </s>))))\n",
+      "Tree depth: 7\n",
+      "                                                          SENT                                                          \n",
+      "                                                           |                                                             \n",
+      "                                                          <s>                                                           \n",
+      "                                                    _______|_________________________________________________________    \n",
+      "                                                  <s>                                                           |    |  \n",
+      "  _________________________________________________|____________________                                        |    |   \n",
+      " |    |     |      |                                                    VN                                      |    |  \n",
+      " |    |     |      |       _____________________________________________|_________________                      |    |   \n",
+      " |    |     |      |      |     |    |      |      |              |           |         COORD                   |    |  \n",
+      " |    |     |      |      |     |    |      |      |              |           |     ______|_____                |    |   \n",
+      " |    |   AP-ATS   |      |     |  AP-ATS   |      |            NP-OBJ        |    |            VN              |   </s>\n",
+      " |    |     |      |      |     |    |      |      |        ______|_____      |    |       _____|________       |    |   \n",
+      " |  ponct  adv   ponct cls-suj  v   adj   ponct cls-suj clo-obj         v   ponct  cc  cls-suj clr  v   vpp   ponct </s>\n",
+      " |    |     |      |      |     |    |      |      |       |            |     |    |      |     |   |    |      |    |   \n",
+      "<s>   --   Non     ,      il   est  vrai    ,      je      le          suis   ,   mais    il    m'  a  semblé  ...  </s>\n",
+      "\n",
+      "NLTK TREE (SENT\n",
+      "  (<s>\n",
+      "    (<s>\n",
+      "      <s>\n",
+      "      (ponct --)\n",
+      "      (AP-ATS (adv Non))\n",
+      "      (ponct ,)\n",
+      "      (VN\n",
+      "        (cls-suj il)\n",
+      "        (v est)\n",
+      "        (AP-ATS (adj vrai))\n",
+      "        (ponct ,)\n",
+      "        (cls-suj je)\n",
+      "        (NP-OBJ (clo-obj le) (v suis))\n",
+      "        (ponct ,)\n",
+      "        (COORD\n",
+      "          (cc mais)\n",
+      "          (VN (cls-suj il) (clr m') (v a) (vpp semblé)))))\n",
+      "    (ponct ...)\n",
+      "    (</s> (</s> </s>))))\n",
+      "Tree depth: 7\n",
+      "     SENT     \n",
+      "      |        \n",
+      "     <s>      \n",
+      "      |        \n",
+      "     <s>      \n",
+      "  ____|____    \n",
+      " |    |   </s>\n",
+      " |    |    |   \n",
+      " |  ponct </s>\n",
+      " |    |    |   \n",
+      "<s>   --  </s>\n",
+      "\n",
+      "NLTK TREE (SENT (<s> (<s> <s> (ponct --) (</s> (</s> </s>)))))\n",
+      "Tree depth: 5\n",
+      "                                  SENT                                 \n",
+      "                                   |                                    \n",
+      "                                  <s>                                  \n",
+      "                ___________________|________________________________    \n",
+      "            Sint-MOD               |         Sint-MOD          |    |  \n",
+      "  _____________|______             |      ______|_______       |    |   \n",
+      " |                    VN           |     VN           NP-OBJ   |   </s>\n",
+      " |      ______________|_____       |     |              |      |    |   \n",
+      "<s> cls-suj           v    vpp   ponct  vinf           adv   ponct </s>\n",
+      " |     |              |     |      |     |              |      |    |   \n",
+      "<s>    Il             a  répondu   :   voilà           tout    .   </s>\n",
+      "\n",
+      "NLTK TREE (SENT\n",
+      "  (<s>\n",
+      "    (Sint-MOD (<s> <s>) (VN (cls-suj Il) (v a) (vpp répondu)))\n",
+      "    (ponct :)\n",
+      "    (Sint-MOD (VN (vinf voilà)) (NP-OBJ (adv tout)))\n",
+      "    (ponct .)\n",
+      "    (</s> (</s> </s>))))\n",
+      "Tree depth: 5\n",
+      "                                      SENT                                                    \n",
+      "                                       |                                                       \n",
+      "                                      <s>                                                     \n",
+      "                                       |                                                       \n",
+      "                                    Sint-MOD                                                  \n",
+      "  _____________________________________|____________________                                   \n",
+      " |           |              |                             COORD                               \n",
+      " |           |              |     __________________________|____________________              \n",
+      " |           |              |    |            |       |     |                    PP           \n",
+      " |           |              |    |            |       |     |      ______________|____         \n",
+      " |           |              |    |            |       |     |     |                 VPinf     \n",
+      " |           |              |    |            |       |     |     |       ____________|____    \n",
+      " |           VN             |    |            VN      |   AP-ATS  |      VN           |   </s>\n",
+      " |      _____|________      |    |      ______|___    |     |     |      |            |    |   \n",
+      "<s> cls-suj adv adv   v   ponct  cc cls-suj       v  adv   adj    p     vinf        ponct </s>\n",
+      " |     |     |   |    |     |    |     |          |   |     |     |      |            |    |   \n",
+      "<s>    Je    ne puis dire   ,   car    il        est bien facile  à  comprendre       .   </s>\n",
+      "\n",
+      "NLTK TREE (SENT\n",
+      "  (<s>\n",
+      "    (Sint-MOD\n",
+      "      (<s> <s>)\n",
+      "      (VN (cls-suj Je) (adv ne) (adv puis) (v dire))\n",
+      "      (ponct ,)\n",
+      "      (COORD\n",
+      "        (cc car)\n",
+      "        (VN (cls-suj il) (v est))\n",
+      "        (adv bien)\n",
+      "        (AP-ATS (adj facile))\n",
+      "        (PP\n",
+      "          (p à)\n",
+      "          (VPinf (VN (vinf comprendre)) (ponct .) (</s> (</s> </s>))))))))\n",
+      "Tree depth: 8\n",
+      "-- Le parlement ... </s> <s> -- Vous , dit Athos , je ne me reconnais pas . </s> <s> -- M. d' Artagnan . </s> <s> Acté ? </s> <s> -- Oui , répondit le mousquetaire . </s> <s> -- Alors , dit le roi , il faut que vous ne le rendiez pas . </s> <s> -- Non , il est vrai , je le suis , mais il m' a semblé ... </s> <s> -- </s> <s> Il a répondu : voilà tout . </s> <s> Je ne puis dire , car il est bien facile à comprendre . </s> \n",
+      "0\n"
      ]
     }
    ],
@@ -1182,15 +1275,21 @@
     "\n",
     "sents = readFile(file_conll)\n",
     "exepts = []\n",
-    "for sent in sents[:5]:\n",
+    "file_text = ''\n",
+    "for sent in sents[:10]:\n",
     "    sent_lst = ['\\t'.join(word)+'\\n' for word in sent]\n",
-    "    sent_str = ''.join(sent_lst)\n",
-    "    print(sent_str)\n",
+    "    triple_str = ''.join(sent_lst)\n",
+    "    sent_text = ' '.join([word[0] for word in sent]) + ' '\n",
+    "    file_text += sent_text\n",
     "    try:\n",
-    "        visualize_const_prediction(sent_str)\n",
+    "        visualize_const_prediction(triple_str)\n",
     "    except:\n",
-    "        exepts.append(sent_str)\n",
-    "print(len(exepts))"
+    "        exepts.append(triple_str)\n",
+    "print(file_text)\n",
+    "print(\"Number of sentences without a tree: \", len(exepts))\n",
+    "\n",
+    "for exept in exepts:\n",
+    "    print(exept)"
    ]
   },
   {
@@ -1199,7 +1298,43 @@
    "id": "729d7671-02be-443b-94ee-ddc79284aac4",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "def prepare_file_to_evaluate(file_conll):\n",
+    "    sents = readFile(file_conll)\n",
+    "    file_text = ''\n",
+    "    triple_strings = []\n",
+    "    for sent in sents[:10]:\n",
+    "        sent_lst = ['\\t'.join(word)+'\\n' for word in sent]\n",
+    "        triple_str = ''.join(sent_lst)\n",
+    "        sent_text = ' '.join([word[0] for word in sent]) + ' '\n",
+    "        file_text += sent_text\n",
+    "        triple_strings.append(triple_str)\n",
+    "    return file_text, triple_strings\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cefe6712-ab53-4589-9fb8-8d4c3fb00378",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_text, triple_strings = prepare_file_to_evaluate(file_conll)\n",
+    "\n",
+    "exepts = [] \n",
+    "\n",
+    "for triple_str in triple_strings:\n",
+    "    try:\n",
+    "        visualize_const_prediction(triple_str)\n",
+    "    except:\n",
+    "        exepts.append(triple_str)\n",
+    "print(\"Number of sentences without a tree: \", len(exepts))\n",
+    "    \n",
+    "for exept in exepts:\n",
+    "    print(exept)\n",
+    "    \n"
+   ]
   }
  ],
  "metadata": {
-- 
GitLab