From 91d10249fbf52bbd55750d1567c73cfcbc8c18ef Mon Sep 17 00:00:00 2001 From: BLADIER Tatiana <tatiana.bladier@lis-lab.fr> Date: Fri, 16 May 2025 10:35:29 +0200 Subject: [PATCH] add readfile option --- .../tania-some-other-metrics-checkpoint.ipynb | 241 ++++++++++++++---- tania_scripts/tania-some-other-metrics.ipynb | 241 ++++++++++++++---- 2 files changed, 376 insertions(+), 106 deletions(-) diff --git a/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb b/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb index a918ace..0197b08 100644 --- a/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb +++ b/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb @@ -1015,7 +1015,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 43, "id": "673d6a76-42a0-4dcd-9c54-ae18710a032a", "metadata": {}, "outputs": [ @@ -1023,12 +1023,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "--\tponct\t<nul>@@<nul>\t0\n", - "Le\tdet\tNP@@<nul>\t0\n", - "parlement\tnc\t<nul>@@<nul>\t1\n", - "...\tponct\t<nul>@@SENT\t0\n", - "</s>\t</s>\t</s>\t0\n", - "\n", " SENT \n", " | \n", " SENT \n", @@ -1047,21 +1041,6 @@ " (ponct ...)\n", " (</s> (</s> </s>))))\n", "Tree depth: 5\n", - "<s>\t<s>\t<s>\t1\n", - "--\tponct\t<nul>@@<nul>\t0\n", - "Vous\tpro\tVN@@<nul>\t0\n", - ",\tponct\t<nul>@@<nul>\t1\n", - "dit\tv\t<nul>@@<nul>\t1\n", - "Athos\tnpp\tNP-OBJ@@Sint-MOD\t1\n", - ",\tponct\t<nul>@@<nul>\t1\n", - "je\tcls-suj\tVN@@<nul>\t1\n", - "ne\tadv\t<nul>@@<nul>\t2\n", - "me\tv\t<nul>@@<nul>\t2\n", - "reconnais\tv\t<nul>@@<nul>\t2\n", - "pas\tadv\t<nul>@@<nul>\t2\n", - ".\tponct\t<nul>@@<nul>\t0\n", - "</s>\t</s>\t</s>\t0\n", - "\n", " SENT \n", " | \n", " <s> \n", @@ -1086,14 +1065,6 @@ " (ponct .)\n", " (</s> (</s> </s>))))\n", "Tree depth: 5\n", - "<s>\t<s>\t<s>\t1\n", - "--\tponct\t<nul>@@<nul>\t1\n", - "M.\tnpp\tNP-SUJ@@<nul>\t1\n", - "d'\tp\tNPP+@@<nul>\t2\n", - "Artagnan\tnpp\t<nul>@@<nul>\t3\n", - ".\tponct\t<nul>@@<nul>\t0\n", - "</s>\t</s>\t</s>\t0\n", - "\n", " SENT \n", " | \n", " <s> \n", @@ -1117,11 +1088,6 @@ " (ponct .)\n", " (</s> (</s> </s>))))\n", "Tree depth: 6\n", - "<s>\t<s>\t<s>\t1\n", - "Acté\tnc\tNP-OBJ@@<nul>\t1\n", - "?\tponct\t<nul>@@<nul>\t0\n", - "</s>\t</s>\t</s>\t0\n", - "\n", " SENT \n", " | \n", " <s> \n", @@ -1137,16 +1103,6 @@ "NLTK TREE (SENT\n", " (<s> (<s> <s> (NP-OBJ (nc Acté))) (ponct ?) (</s> (</s> </s>))))\n", "Tree depth: 5\n", - "<s>\t<s>\t<s>\t1\n", - "--\tponct\t<nul>@@<nul>\t1\n", - "Oui\tadj\tAP-ATS@@<nul>\t1\n", - ",\tponct\t<nul>@@<nul>\t1\n", - "répondit\tv\tVN@@<nul>\t1\n", - "le\tdet\tNP-OBJ@@Sint-MOD\t2\n", - "mousquetaire\tnc\t<nul>@@<nul>\t3\n", - ".\tponct\t<nul>@@<nul>\t0\n", - "</s>\t</s>\t</s>\t0\n", - "\n", " SENT \n", " | \n", " <s> \n", @@ -1173,7 +1129,144 @@ " (NP-OBJ (det le) (nc mousquetaire))))\n", " (ponct .)\n", " (</s> (</s> </s>))))\n", - "Tree depth: 6\n" + "Tree depth: 6\n", + " SENT \n", + " | \n", + " <s> \n", + " ________|_____________________________________________________________ \n", + " Sint-MOD | | \n", + " _______________________________|_____________________________________ | | \n", + " | | | | Ssub-OBJ | | \n", + " | | | | _____________|________ | | \n", + " <s> | | | | Sint | | \n", + " __________|__________ | | | | ________|_____________ | | \n", + " | | | | VN NP-OBJ | VN | VN | | </s>\n", + " | | | | | ____|_____ | _____|___ | _______|________________ | | | \n", + " | ponct adv ponct v det nc ponct cls-suj v cs cls-suj adv clo-obj v adv ponct </s>\n", + " | | | | | | | | | | | | | | | | | | \n", + "<s> -- Alors , dit le roi , il faut que vous ne le rendiez pas . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (Sint-MOD\n", + " (<s> <s> (ponct --) (adv Alors) (ponct ,) (VN (v dit)))\n", + " (NP-OBJ (det le) (nc roi))\n", + " (ponct ,)\n", + " (VN (cls-suj il) (v faut))\n", + " (Ssub-OBJ\n", + " (cs que)\n", + " (Sint\n", + " (VN (cls-suj vous) (adv ne) (clo-obj le) (v rendiez))\n", + " (adv pas))))\n", + " (ponct .)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 7\n", + " SENT \n", + " | \n", + " <s> \n", + " _______|_________________________________________________________ \n", + " <s> | | \n", + " _________________________________________________|____________________ | | \n", + " | | | | VN | | \n", + " | | | | _____________________________________________|_________________ | | \n", + " | | | | | | | | | | | COORD | | \n", + " | | | | | | | | | | | ______|_____ | | \n", + " | | AP-ATS | | | AP-ATS | | NP-OBJ | | VN | </s>\n", + " | | | | | | | | | ______|_____ | | _____|________ | | \n", + " | ponct adv ponct cls-suj v adj ponct cls-suj clo-obj v ponct cc cls-suj clr v vpp ponct </s>\n", + " | | | | | | | | | | | | | | | | | | | \n", + "<s> -- Non , il est vrai , je le suis , mais il m' a semblé ... </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (<s>\n", + " <s>\n", + " (ponct --)\n", + " (AP-ATS (adv Non))\n", + " (ponct ,)\n", + " (VN\n", + " (cls-suj il)\n", + " (v est)\n", + " (AP-ATS (adj vrai))\n", + " (ponct ,)\n", + " (cls-suj je)\n", + " (NP-OBJ (clo-obj le) (v suis))\n", + " (ponct ,)\n", + " (COORD\n", + " (cc mais)\n", + " (VN (cls-suj il) (clr m') (v a) (vpp semblé)))))\n", + " (ponct ...)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 7\n", + " SENT \n", + " | \n", + " <s> \n", + " | \n", + " <s> \n", + " ____|____ \n", + " | | </s>\n", + " | | | \n", + " | ponct </s>\n", + " | | | \n", + "<s> -- </s>\n", + "\n", + "NLTK TREE (SENT (<s> (<s> <s> (ponct --) (</s> (</s> </s>)))))\n", + "Tree depth: 5\n", + " SENT \n", + " | \n", + " <s> \n", + " ___________________|________________________________ \n", + " Sint-MOD | Sint-MOD | | \n", + " _____________|______ | ______|_______ | | \n", + " | VN | VN NP-OBJ | </s>\n", + " | ______________|_____ | | | | | \n", + "<s> cls-suj v vpp ponct vinf adv ponct </s>\n", + " | | | | | | | | | \n", + "<s> Il a répondu : voilà tout . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (Sint-MOD (<s> <s>) (VN (cls-suj Il) (v a) (vpp répondu)))\n", + " (ponct :)\n", + " (Sint-MOD (VN (vinf voilà)) (NP-OBJ (adv tout)))\n", + " (ponct .)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 5\n", + " SENT \n", + " | \n", + " <s> \n", + " | \n", + " Sint-MOD \n", + " _____________________________________|____________________ \n", + " | | | COORD \n", + " | | | __________________________|____________________ \n", + " | | | | | | | PP \n", + " | | | | | | | ______________|____ \n", + " | | | | | | | | VPinf \n", + " | | | | | | | | ____________|____ \n", + " | VN | | VN | AP-ATS | VN | </s>\n", + " | _____|________ | | ______|___ | | | | | | \n", + "<s> cls-suj adv adv v ponct cc cls-suj v adv adj p vinf ponct </s>\n", + " | | | | | | | | | | | | | | | \n", + "<s> Je ne puis dire , car il est bien facile à comprendre . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (Sint-MOD\n", + " (<s> <s>)\n", + " (VN (cls-suj Je) (adv ne) (adv puis) (v dire))\n", + " (ponct ,)\n", + " (COORD\n", + " (cc car)\n", + " (VN (cls-suj il) (v est))\n", + " (adv bien)\n", + " (AP-ATS (adj facile))\n", + " (PP\n", + " (p à)\n", + " (VPinf (VN (vinf comprendre)) (ponct .) (</s> (</s> </s>))))))))\n", + "Tree depth: 8\n", + "-- Le parlement ... </s> <s> -- Vous , dit Athos , je ne me reconnais pas . </s> <s> -- M. d' Artagnan . </s> <s> Acté ? </s> <s> -- Oui , répondit le mousquetaire . </s> <s> -- Alors , dit le roi , il faut que vous ne le rendiez pas . </s> <s> -- Non , il est vrai , je le suis , mais il m' a semblé ... </s> <s> -- </s> <s> Il a répondu : voilà tout . </s> <s> Je ne puis dire , car il est bien facile à comprendre . </s> \n", + "0\n" ] } ], @@ -1182,15 +1275,21 @@ "\n", "sents = readFile(file_conll)\n", "exepts = []\n", - "for sent in sents[:5]:\n", + "file_text = ''\n", + "for sent in sents[:10]:\n", " sent_lst = ['\\t'.join(word)+'\\n' for word in sent]\n", - " sent_str = ''.join(sent_lst)\n", - " print(sent_str)\n", + " triple_str = ''.join(sent_lst)\n", + " sent_text = ' '.join([word[0] for word in sent]) + ' '\n", + " file_text += sent_text\n", " try:\n", - " visualize_const_prediction(sent_str)\n", + " visualize_const_prediction(triple_str)\n", " except:\n", - " exepts.append(sent_str)\n", - "print(len(exepts))" + " exepts.append(triple_str)\n", + "print(file_text)\n", + "print(\"Number of sentences without a tree: \", len(exepts))\n", + "\n", + "for exept in exepts:\n", + " print(exept)" ] }, { @@ -1199,7 +1298,43 @@ "id": "729d7671-02be-443b-94ee-ddc79284aac4", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def prepare_file_to_evaluate(file_conll):\n", + " sents = readFile(file_conll)\n", + " file_text = ''\n", + " triple_strings = []\n", + " for sent in sents[:10]:\n", + " sent_lst = ['\\t'.join(word)+'\\n' for word in sent]\n", + " triple_str = ''.join(sent_lst)\n", + " sent_text = ' '.join([word[0] for word in sent]) + ' '\n", + " file_text += sent_text\n", + " triple_strings.append(triple_str)\n", + " return file_text, triple_strings\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cefe6712-ab53-4589-9fb8-8d4c3fb00378", + "metadata": {}, + "outputs": [], + "source": [ + "file_text, triple_strings = prepare_file_to_evaluate(file_conll)\n", + "\n", + "exepts = [] \n", + "\n", + "for triple_str in triple_strings:\n", + " try:\n", + " visualize_const_prediction(triple_str)\n", + " except:\n", + " exepts.append(triple_str)\n", + "print(\"Number of sentences without a tree: \", len(exepts))\n", + " \n", + "for exept in exepts:\n", + " print(exept)\n", + " \n" + ] } ], "metadata": { diff --git a/tania_scripts/tania-some-other-metrics.ipynb b/tania_scripts/tania-some-other-metrics.ipynb index a918ace..0197b08 100644 --- a/tania_scripts/tania-some-other-metrics.ipynb +++ b/tania_scripts/tania-some-other-metrics.ipynb @@ -1015,7 +1015,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 43, "id": "673d6a76-42a0-4dcd-9c54-ae18710a032a", "metadata": {}, "outputs": [ @@ -1023,12 +1023,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "--\tponct\t<nul>@@<nul>\t0\n", - "Le\tdet\tNP@@<nul>\t0\n", - "parlement\tnc\t<nul>@@<nul>\t1\n", - "...\tponct\t<nul>@@SENT\t0\n", - "</s>\t</s>\t</s>\t0\n", - "\n", " SENT \n", " | \n", " SENT \n", @@ -1047,21 +1041,6 @@ " (ponct ...)\n", " (</s> (</s> </s>))))\n", "Tree depth: 5\n", - "<s>\t<s>\t<s>\t1\n", - "--\tponct\t<nul>@@<nul>\t0\n", - "Vous\tpro\tVN@@<nul>\t0\n", - ",\tponct\t<nul>@@<nul>\t1\n", - "dit\tv\t<nul>@@<nul>\t1\n", - "Athos\tnpp\tNP-OBJ@@Sint-MOD\t1\n", - ",\tponct\t<nul>@@<nul>\t1\n", - "je\tcls-suj\tVN@@<nul>\t1\n", - "ne\tadv\t<nul>@@<nul>\t2\n", - "me\tv\t<nul>@@<nul>\t2\n", - "reconnais\tv\t<nul>@@<nul>\t2\n", - "pas\tadv\t<nul>@@<nul>\t2\n", - ".\tponct\t<nul>@@<nul>\t0\n", - "</s>\t</s>\t</s>\t0\n", - "\n", " SENT \n", " | \n", " <s> \n", @@ -1086,14 +1065,6 @@ " (ponct .)\n", " (</s> (</s> </s>))))\n", "Tree depth: 5\n", - "<s>\t<s>\t<s>\t1\n", - "--\tponct\t<nul>@@<nul>\t1\n", - "M.\tnpp\tNP-SUJ@@<nul>\t1\n", - "d'\tp\tNPP+@@<nul>\t2\n", - "Artagnan\tnpp\t<nul>@@<nul>\t3\n", - ".\tponct\t<nul>@@<nul>\t0\n", - "</s>\t</s>\t</s>\t0\n", - "\n", " SENT \n", " | \n", " <s> \n", @@ -1117,11 +1088,6 @@ " (ponct .)\n", " (</s> (</s> </s>))))\n", "Tree depth: 6\n", - "<s>\t<s>\t<s>\t1\n", - "Acté\tnc\tNP-OBJ@@<nul>\t1\n", - "?\tponct\t<nul>@@<nul>\t0\n", - "</s>\t</s>\t</s>\t0\n", - "\n", " SENT \n", " | \n", " <s> \n", @@ -1137,16 +1103,6 @@ "NLTK TREE (SENT\n", " (<s> (<s> <s> (NP-OBJ (nc Acté))) (ponct ?) (</s> (</s> </s>))))\n", "Tree depth: 5\n", - "<s>\t<s>\t<s>\t1\n", - "--\tponct\t<nul>@@<nul>\t1\n", - "Oui\tadj\tAP-ATS@@<nul>\t1\n", - ",\tponct\t<nul>@@<nul>\t1\n", - "répondit\tv\tVN@@<nul>\t1\n", - "le\tdet\tNP-OBJ@@Sint-MOD\t2\n", - "mousquetaire\tnc\t<nul>@@<nul>\t3\n", - ".\tponct\t<nul>@@<nul>\t0\n", - "</s>\t</s>\t</s>\t0\n", - "\n", " SENT \n", " | \n", " <s> \n", @@ -1173,7 +1129,144 @@ " (NP-OBJ (det le) (nc mousquetaire))))\n", " (ponct .)\n", " (</s> (</s> </s>))))\n", - "Tree depth: 6\n" + "Tree depth: 6\n", + " SENT \n", + " | \n", + " <s> \n", + " ________|_____________________________________________________________ \n", + " Sint-MOD | | \n", + " _______________________________|_____________________________________ | | \n", + " | | | | Ssub-OBJ | | \n", + " | | | | _____________|________ | | \n", + " <s> | | | | Sint | | \n", + " __________|__________ | | | | ________|_____________ | | \n", + " | | | | VN NP-OBJ | VN | VN | | </s>\n", + " | | | | | ____|_____ | _____|___ | _______|________________ | | | \n", + " | ponct adv ponct v det nc ponct cls-suj v cs cls-suj adv clo-obj v adv ponct </s>\n", + " | | | | | | | | | | | | | | | | | | \n", + "<s> -- Alors , dit le roi , il faut que vous ne le rendiez pas . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (Sint-MOD\n", + " (<s> <s> (ponct --) (adv Alors) (ponct ,) (VN (v dit)))\n", + " (NP-OBJ (det le) (nc roi))\n", + " (ponct ,)\n", + " (VN (cls-suj il) (v faut))\n", + " (Ssub-OBJ\n", + " (cs que)\n", + " (Sint\n", + " (VN (cls-suj vous) (adv ne) (clo-obj le) (v rendiez))\n", + " (adv pas))))\n", + " (ponct .)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 7\n", + " SENT \n", + " | \n", + " <s> \n", + " _______|_________________________________________________________ \n", + " <s> | | \n", + " _________________________________________________|____________________ | | \n", + " | | | | VN | | \n", + " | | | | _____________________________________________|_________________ | | \n", + " | | | | | | | | | | | COORD | | \n", + " | | | | | | | | | | | ______|_____ | | \n", + " | | AP-ATS | | | AP-ATS | | NP-OBJ | | VN | </s>\n", + " | | | | | | | | | ______|_____ | | _____|________ | | \n", + " | ponct adv ponct cls-suj v adj ponct cls-suj clo-obj v ponct cc cls-suj clr v vpp ponct </s>\n", + " | | | | | | | | | | | | | | | | | | | \n", + "<s> -- Non , il est vrai , je le suis , mais il m' a semblé ... </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (<s>\n", + " <s>\n", + " (ponct --)\n", + " (AP-ATS (adv Non))\n", + " (ponct ,)\n", + " (VN\n", + " (cls-suj il)\n", + " (v est)\n", + " (AP-ATS (adj vrai))\n", + " (ponct ,)\n", + " (cls-suj je)\n", + " (NP-OBJ (clo-obj le) (v suis))\n", + " (ponct ,)\n", + " (COORD\n", + " (cc mais)\n", + " (VN (cls-suj il) (clr m') (v a) (vpp semblé)))))\n", + " (ponct ...)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 7\n", + " SENT \n", + " | \n", + " <s> \n", + " | \n", + " <s> \n", + " ____|____ \n", + " | | </s>\n", + " | | | \n", + " | ponct </s>\n", + " | | | \n", + "<s> -- </s>\n", + "\n", + "NLTK TREE (SENT (<s> (<s> <s> (ponct --) (</s> (</s> </s>)))))\n", + "Tree depth: 5\n", + " SENT \n", + " | \n", + " <s> \n", + " ___________________|________________________________ \n", + " Sint-MOD | Sint-MOD | | \n", + " _____________|______ | ______|_______ | | \n", + " | VN | VN NP-OBJ | </s>\n", + " | ______________|_____ | | | | | \n", + "<s> cls-suj v vpp ponct vinf adv ponct </s>\n", + " | | | | | | | | | \n", + "<s> Il a répondu : voilà tout . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (Sint-MOD (<s> <s>) (VN (cls-suj Il) (v a) (vpp répondu)))\n", + " (ponct :)\n", + " (Sint-MOD (VN (vinf voilà)) (NP-OBJ (adv tout)))\n", + " (ponct .)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 5\n", + " SENT \n", + " | \n", + " <s> \n", + " | \n", + " Sint-MOD \n", + " _____________________________________|____________________ \n", + " | | | COORD \n", + " | | | __________________________|____________________ \n", + " | | | | | | | PP \n", + " | | | | | | | ______________|____ \n", + " | | | | | | | | VPinf \n", + " | | | | | | | | ____________|____ \n", + " | VN | | VN | AP-ATS | VN | </s>\n", + " | _____|________ | | ______|___ | | | | | | \n", + "<s> cls-suj adv adv v ponct cc cls-suj v adv adj p vinf ponct </s>\n", + " | | | | | | | | | | | | | | | \n", + "<s> Je ne puis dire , car il est bien facile à comprendre . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (Sint-MOD\n", + " (<s> <s>)\n", + " (VN (cls-suj Je) (adv ne) (adv puis) (v dire))\n", + " (ponct ,)\n", + " (COORD\n", + " (cc car)\n", + " (VN (cls-suj il) (v est))\n", + " (adv bien)\n", + " (AP-ATS (adj facile))\n", + " (PP\n", + " (p à)\n", + " (VPinf (VN (vinf comprendre)) (ponct .) (</s> (</s> </s>))))))))\n", + "Tree depth: 8\n", + "-- Le parlement ... </s> <s> -- Vous , dit Athos , je ne me reconnais pas . </s> <s> -- M. d' Artagnan . </s> <s> Acté ? </s> <s> -- Oui , répondit le mousquetaire . </s> <s> -- Alors , dit le roi , il faut que vous ne le rendiez pas . </s> <s> -- Non , il est vrai , je le suis , mais il m' a semblé ... </s> <s> -- </s> <s> Il a répondu : voilà tout . </s> <s> Je ne puis dire , car il est bien facile à comprendre . </s> \n", + "0\n" ] } ], @@ -1182,15 +1275,21 @@ "\n", "sents = readFile(file_conll)\n", "exepts = []\n", - "for sent in sents[:5]:\n", + "file_text = ''\n", + "for sent in sents[:10]:\n", " sent_lst = ['\\t'.join(word)+'\\n' for word in sent]\n", - " sent_str = ''.join(sent_lst)\n", - " print(sent_str)\n", + " triple_str = ''.join(sent_lst)\n", + " sent_text = ' '.join([word[0] for word in sent]) + ' '\n", + " file_text += sent_text\n", " try:\n", - " visualize_const_prediction(sent_str)\n", + " visualize_const_prediction(triple_str)\n", " except:\n", - " exepts.append(sent_str)\n", - "print(len(exepts))" + " exepts.append(triple_str)\n", + "print(file_text)\n", + "print(\"Number of sentences without a tree: \", len(exepts))\n", + "\n", + "for exept in exepts:\n", + " print(exept)" ] }, { @@ -1199,7 +1298,43 @@ "id": "729d7671-02be-443b-94ee-ddc79284aac4", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def prepare_file_to_evaluate(file_conll):\n", + " sents = readFile(file_conll)\n", + " file_text = ''\n", + " triple_strings = []\n", + " for sent in sents[:10]:\n", + " sent_lst = ['\\t'.join(word)+'\\n' for word in sent]\n", + " triple_str = ''.join(sent_lst)\n", + " sent_text = ' '.join([word[0] for word in sent]) + ' '\n", + " file_text += sent_text\n", + " triple_strings.append(triple_str)\n", + " return file_text, triple_strings\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cefe6712-ab53-4589-9fb8-8d4c3fb00378", + "metadata": {}, + "outputs": [], + "source": [ + "file_text, triple_strings = prepare_file_to_evaluate(file_conll)\n", + "\n", + "exepts = [] \n", + "\n", + "for triple_str in triple_strings:\n", + " try:\n", + " visualize_const_prediction(triple_str)\n", + " except:\n", + " exepts.append(triple_str)\n", + "print(\"Number of sentences without a tree: \", len(exepts))\n", + " \n", + "for exept in exepts:\n", + " print(exept)\n", + " \n" + ] } ], "metadata": { -- GitLab