From 6187b340c5d519b3745b426c85942d88d7dcca5a Mon Sep 17 00:00:00 2001 From: BLADIER Tatiana <tatiana.bladier@lis-lab.fr> Date: Fri, 16 May 2025 10:01:35 +0200 Subject: [PATCH] add readfile option --- ...in_loss-4.3-val_loss-4.9-checkpoint.pt_500 | 506 ++++++++++++++++++ .../tania-some-other-metrics-checkpoint.ipynb | 171 +++++- tania_scripts/tania-some-other-metrics.ipynb | 171 +++++- 3 files changed, 836 insertions(+), 12 deletions(-) create mode 100644 tania_scripts/.ipynb_checkpoints/sp_generated_temp_0.7_all_data_73m_tok_pos_macro_positional_const-epoch-8-train_loss-4.3-val_loss-4.9-checkpoint.pt_500 diff --git a/tania_scripts/.ipynb_checkpoints/sp_generated_temp_0.7_all_data_73m_tok_pos_macro_positional_const-epoch-8-train_loss-4.3-val_loss-4.9-checkpoint.pt_500 b/tania_scripts/.ipynb_checkpoints/sp_generated_temp_0.7_all_data_73m_tok_pos_macro_positional_const-epoch-8-train_loss-4.3-val_loss-4.9-checkpoint.pt_500 new file mode 100644 index 0000000..0cc46f2 --- /dev/null +++ b/tania_scripts/.ipynb_checkpoints/sp_generated_temp_0.7_all_data_73m_tok_pos_macro_positional_const-epoch-8-train_loss-4.3-val_loss-4.9-checkpoint.pt_500 @@ -0,0 +1,506 @@ +<s> ||| END OF PROMPT: ||| +[['<s>', '<s>', '<s>', '0']] + +-- ponct <nul>@@<nul> 0 +Le det NP@@<nul> 0 +parlement nc <nul>@@<nul> 1 +... ponct <nul>@@SENT 0 +</s> </s> </s> 0 +<s> <s> <s> 1 +-- ponct <nul>@@<nul> 0 +Vous pro VN@@<nul> 0 +, ponct <nul>@@<nul> 1 +dit v <nul>@@<nul> 1 +Athos npp NP-OBJ@@Sint-MOD 1 +, ponct <nul>@@<nul> 1 +je cls-suj VN@@<nul> 1 +ne adv <nul>@@<nul> 2 +me v <nul>@@<nul> 2 +reconnais v <nul>@@<nul> 2 +pas adv <nul>@@<nul> 2 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 1 +-- ponct <nul>@@<nul> 1 +M. npp NP-SUJ@@<nul> 1 +d' p NPP+@@<nul> 2 +Artagnan npp <nul>@@<nul> 3 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 1 +Acté nc NP-OBJ@@<nul> 1 +? ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 1 +-- ponct <nul>@@<nul> 1 +Oui adj AP-ATS@@<nul> 1 +, ponct <nul>@@<nul> 1 +répondit v VN@@<nul> 1 +le det NP-OBJ@@Sint-MOD 2 +mousquetaire nc <nul>@@<nul> 3 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 1 +-- ponct <nul>@@<nul> 1 +Alors adv <nul>@@<nul> 1 +, ponct <nul>@@<nul> 1 +dit v VN@@<nul> 1 +le det NP-OBJ@@Sint-MOD 1 +roi nc <nul>@@<nul> 2 +, ponct <nul>@@<nul> 1 +il cls-suj VN@@<nul> 1 +faut v <nul>@@<nul> 2 +que cs Ssub-OBJ@@<nul> 1 +vous cls-suj VN@@<nul> 2 +ne adv <nul>@@<nul> 3 +le clo-obj <nul>@@<nul> 3 +rendiez v <nul>@@<nul> 3 +pas adv <nul>@@Sint 3 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 1 +-- ponct <nul>@@<nul> 1 +Non adv AP-ATS@@<nul> 1 +, ponct <nul>@@<nul> 1 +il cls-suj VN@@<nul> 1 +est v <nul>@@<nul> 2 +vrai adj AP-ATS@@<nul> 2 +, ponct <nul>@@<nul> 2 +je cls-suj <nul>@@<nul> 2 +le clo-obj NP-OBJ@@<nul> 2 +suis v <nul>@@<nul> 3 +, ponct <nul>@@<nul> 2 +mais cc COORD@@<nul> 2 +il cls-suj VN@@<nul> 3 +m' clr <nul>@@<nul> 4 +a v <nul>@@<nul> 4 +semblé vpp <nul>@@<nul> 4 +... ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 1 +-- ponct <nul>@@<nul> 1 +</s> </s> </s> 1 +<s> <s> <s> 1 +Il cls-suj VN@@Sint-MOD 1 +a v <nul>@@<nul> 2 +répondu vpp <nul>@@<nul> 2 +: ponct <nul>@@<nul> 0 +voilà vinf VN@@<nul> 0 +tout adv NP-OBJ@@Sint-MOD 1 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 1 +Je cls-suj VN@@Sint-MOD 1 +ne adv <nul>@@<nul> 2 +puis adv <nul>@@<nul> 2 +dire v <nul>@@<nul> 2 +, ponct <nul>@@<nul> 1 +car cc COORD@@<nul> 1 +il cls-suj VN@@<nul> 2 +est v <nul>@@<nul> 3 +bien adv <nul>@@<nul> 2 +facile adj AP-ATS@@<nul> 2 +à p PP@@<nul> 2 +comprendre vinf VN@@<nul> 3 +. ponct <nul>@@VPinf 4 +</s> </s> </s> 4 +<s> <s> <s> 5 +-- ponct <nul>@@<nul> 5 +Mais cc COORD@@<nul> 5 +le det NP@@<nul> 6 +moins adv <nul>@@<nul> 7 +ne adv VN@@<nul> 7 +rendra v <nul>@@<nul> 8 +-t vinf VN@@VPpart 8 +-il cls-suj P+@@VPinf-OBJ 9 +donc adv <nul>@@<nul> 10 +? ponct <nul>@@<nul> 10 +</s> </s> </s> 10 +<s> <s> <s> 10 +-- ponct <nul>@@<nul> 5 +Non adv AP@@<nul> 5 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +J' cls-suj VN@@<nul> 0 +ai v <nul>@@<nul> 1 +vu vpp <nul>@@<nul> 1 +Athos npp NP-OBJ@@Sint-MOD 1 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +Sans p PP-MOD@@<nul> 0 +doute nc NP@@<nul> 1 +, ponct <nul>@@<nul> 1 +mais cc COORD@@<nul> 1 +il cls-suj VN@@<nul> 2 +est v <nul>@@<nul> 3 +vrai adj AP-ATS@@<nul> 2 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +Eh npp <nul>@@<nul> 0 +bien adv <nul>@@<nul> 0 +! ponct <nul>@@<nul> 0 +pardieu nc <nul>@@<nul> 0 +! ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +je cls-suj VN@@<nul> 0 +ne adv <nul>@@<nul> 1 +veux v <nul>@@<nul> 1 +pas adv <nul>@@VPinf-OBJ 1 +qu' adv Ssub-OBJ@@<nul> 1 +il cls-suj VN@@<nul> 2 +soit vs <nul>@@<nul> 3 +avec p PP-MOD@@Sint 3 +lui pro NP@@<nul> 4 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +le det NP-SUJ@@<nul> 0 +roi? nc <nul>@@<nul> 1 +pas adv AP@@<nul> 1 +Aramis adj <nul>@@<nul> 2 +? ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +Oui adj <nul>@@<nul> 0 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +Oui adj <nul>@@<nul> 0 +, ponct <nul>@@<nul> 0 +il cls-suj VN@@<nul> 0 +est v <nul>@@<nul> 1 +évêque adj AP-ATS@@<nul> 1 +, ponct <nul>@@Sint-MOD 1 +dit v VN@@<nul> 1 +Athos npp NP-SUJ@@Sint-MOD 2 +, ponct <nul>@@<nul> 3 +à p PP-MOD@@<nul> 3 +tous adj NP@@<nul> 4 +ses det <nul>@@<nul> 5 +membres nc <nul>@@<nul> 5 +nous cls VN@@<nul> 4 +autres adj <nul>@@<nul> 5 +amis vpp <nul>@@<nul> 5 +, ponct <nul>@@Sint 5 +et cc COORD@@<nul> 3 +nous cls-suj VN@@<nul> 4 +aurons v <nul>@@<nul> 5 +l' det NP-OBJ@@<nul> 4 +honneur nc <nul>@@<nul> 5 +de p PP@@<nul> 5 +nous pro NP@@<nul> 6 +défendre vinf <nul>@@<nul> 7 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +Que prorel <nul>@@<nul> 0 +diable nc <nul>@@<nul> 0 +ne adv VN@@<nul> 0 +le clo-obj <nul>@@<nul> 1 +voulez v <nul>@@<nul> 1 +-vous clo-a_obj <nul>@@VPinf-OBJ 1 +pas adv <nul>@@<nul> 1 +? ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +Aramis npp NP-MOD@@<nul> 0 +, ponct <nul>@@<nul> 1 +qu' cs NP-MOD@@<nul> 1 +il cls-suj VN@@Srel 2 +vous clo-a_obj <nul>@@<nul> 3 +a v <nul>@@<nul> 3 +dit vpp <nul>@@<nul> 3 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +Mais cc COORD@@<nul> 0 +, ponct <nul>@@<nul> 1 +dit v VN@@<nul> 1 +d' p PP-DE_OBJ@@<nul> 1 +Artagnan npp NP@@<nul> 2 +, ponct <nul>@@<nul> 1 +je cls-suj VN@@<nul> 1 +ne adv <nul>@@<nul> 2 +l' clo-obj <nul>@@<nul> 2 +ai v <nul>@@<nul> 2 +point vpp <nul>@@<nul> 2 +aperçu vpp <nul>@@<nul> 2 +, ponct <nul>@@Sint-MOD 2 +je cls-suj VN@@<nul> 1 +ne adv <nul>@@<nul> 2 +le clo-obj <nul>@@<nul> 2 +sais v <nul>@@<nul> 2 +. ponct <nul>@@<nul> 2 +</s> </s> </s> 2 +<s> <s> <s> 2 +-- ponct <nul>@@<nul> 2 +Mordaunt npp <nul>@@<nul> 2 +, ponct <nul>@@<nul> 2 +qu' cs Ssub-OBJ@@<nul> 2 +il cls-suj VN@@<nul> 3 +faut v <nul>@@<nul> 4 +le det NP-OBJ@@Sint 4 +repos nc <nul>@@<nul> 5 +, ponct <nul>@@<nul> 2 +il cls-suj VN@@<nul> 2 +nous clo-a_obj <nul>@@<nul> 3 +rejoindrait v <nul>@@<nul> 3 +sur p PP-P_OBJ@@Sint 3 +le det NP@@<nul> 4 +même adj <nul>@@<nul> 5 +champ nc <nul>@@<nul> 5 +de p PP@@<nul> 5 +bataille nc NP@@<nul> 6 +. ponct <nul>@@<nul> 2 +</s> </s> </s> 2 +<s> <s> <s> 1 +-- ponct <nul>@@<nul> 1 +Est nc NP-OBJ@@<nul> 1 +-ce det NP@@<nul> 2 +lui pro <nul>@@<nul> 3 +? ponct <nul>@@<nul> 1 +demanda v VN@@<nul> 1 +Aramis npp NP-OBJ@@Sint-MOD 2 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +Porthos npp <nul>@@<nul> 0 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +Mordaunt npp <nul>@@<nul> 0 +, ponct <nul>@@<nul> 0 +dit v VN@@<nul> 0 +Athos npp NP-SUJ@@Sint-MOD 1 +, ponct <nul>@@<nul> 2 +il cls-suj VN@@<nul> 2 +est v <nul>@@<nul> 3 +un det NP-OBJ@@Sint 3 +homme nc <nul>@@<nul> 4 +le det NP-OBJ@@<nul> 4 +premier adj <nul>@@<nul> 5 +, ponct <nul>@@<nul> 5 +un det NP@@<nul> 5 +homme nc <nul>@@<nul> 6 +qui prorel NP-SUJ@@<nul> 6 +s' clr VN@@Srel 7 +est v <nul>@@<nul> 8 +passé vpp <nul>@@<nul> 8 +. ponct <nul>@@<nul> 5 +</s> </s> </s> 5 +<s> <s> <s> 6 +-- ponct <nul>@@<nul> 5 +il cls-suj VN@@<nul> 5 +est v <nul>@@<nul> 6 +toujours adv <nul>@@VPpart 6 +le det NP-OBJ@@<nul> 6 +moins adv <nul>@@<nul> 7 +de p PP@@<nul> 7 +le det NP@@<nul> 8 +monde nc <nul>@@<nul> 9 +, ponct <nul>@@<nul> 5 +mais cc COORD@@<nul> 5 +il cls-suj VN@@<nul> 6 +ne adv <nul>@@<nul> 7 +nous clo-a_obj <nul>@@<nul> 7 +aurait v <nul>@@<nul> 7 +-il pro <nul>@@<nul> 7 +pas adv <nul>@@<nul> 7 +de p PP@@<nul> 4 +le det NP@@<nul> 5 +monde nc <nul>@@<nul> 6 +? ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +nous cls-suj VN@@<nul> 0 +l' clo-obj <nul>@@<nul> 1 +avons v <nul>@@<nul> 1 +dit vpp <nul>@@<nul> 1 +, ponct <nul>@@Sint 1 +nous cls-suj VN@@<nul> 1 +l' clo-obj <nul>@@<nul> 2 +avons v <nul>@@<nul> 2 +dit vpp <nul>@@<nul> 2 +, ponct <nul>@@Sint-MOD 2 +nous cls-suj VN@@<nul> 1 +l' clo-obj <nul>@@<nul> 2 +avons v <nul>@@<nul> 2 +dit vpp <nul>@@<nul> 2 +, ponct <nul>@@<nul> 1 +les det NP-OBJ@@<nul> 1 +avons nc <nul>@@<nul> 2 +vu vpp VPpart@@<nul> 2 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +Aramis npp <nul>@@<nul> 0 +! ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +Je cls-suj VN@@<nul> 0 +m' clr <nul>@@<nul> 1 +en clo <nul>@@<nul> 1 +doutais v <nul>@@<nul> 1 +. ponct <nul>@@<nul> 1 +</s> </s> </s> 1 +<s> <s> <s> 1 +-- ponct <nul>@@<nul> 1 +Aramis npp AP-ATS@@<nul> 1 +, ponct <nul>@@<nul> 1 +je cls-suj VN@@<nul> 1 +le clo-obj <nul>@@<nul> 2 +sais v <nul>@@<nul> 2 +, ponct <nul>@@<nul> 1 +mais cc COORD@@<nul> 1 +il cls-suj VN@@<nul> 2 +venait v <nul>@@<nul> 3 +me det <nul>@@<nul> 2 +donner vinf VN@@<nul> 2 +de p PP-DE_OBJ@@VPinf-OBJ 3 +l' det NP@@<nul> 4 +hospitalité nc <nul>@@<nul> 5 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +-- ponct <nul>@@<nul> 0 +Oui adj <nul>@@<nul> 0 +, ponct <nul>@@<nul> 0 +je cls-suj VN@@<nul> 0 +l' clo-obj <nul>@@<nul> 1 +ignore v <nul>@@<nul> 1 +; ponct <nul>@@Sint-MOD 1 +il cls-suj VN@@<nul> 1 +s' clr <nul>@@<nul> 2 +agit v <nul>@@<nul> 2 +d' p PP-DE_OBJ@@Sint-MOD 2 +être vinf VN@@<nul> 3 +élevé vpp <nul>@@<nul> 4 +, ponct <nul>@@VPinf 4 +moi adj <nul>@@<nul> 4 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +Athos npp <nul>@@<nul> 0 +et cc COORD@@<nul> 0 +Porthos npp NP@@<nul> 1 +se clr VN@@<nul> 1 +regardaient v <nul>@@<nul> 2 +avec p PP-MOD@@<nul> 1 +politesse nc NP@@<nul> 2 +avec p PP-MOD@@<nul> 1 +étonnement nc NP@@<nul> 2 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +Porthos npp <nul>@@<nul> 0 +, ponct <nul>@@<nul> 0 +qui prorel NP-SUJ@@<nul> 0 +n' adv VN@@Srel 1 +avait v <nul>@@<nul> 2 +pas adv <nul>@@<nul> 2 +d' det PP-DE_OBJ@@<nul> 1 +esprit nc NP@@<nul> 2 +, ponct <nul>@@<nul> 1 +et cc COORD@@<nul> 1 +l' det NP-SUJ@@<nul> 2 +avait v <nul>@@<nul> 3 +vu vpp VPpart@@<nul> 3 +le det NP@@<nul> 4 +roi nc <nul>@@<nul> 5 +, ponct <nul>@@<nul> 3 +il cls-suj VN@@<nul> 3 +était v <nul>@@<nul> 4 +comme p PP-MOD@@<nul> 4 +ami nc <nul>@@<nul> 4 +de p PP-DE_OBJ@@VPinf 4 +son det NP@@<nul> 5 +beau-frère nc <nul>@@<nul> 6 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +Athos npp <nul>@@<nul> 0 +était v <nul>@@<nul> 0 +debout adj <nul>@@<nul> 0 +, ponct <nul>@@<nul> 0 +comme cs Ssub-MOD@@<nul> 0 +le det VN@@<nul> 1 +regardait v <nul>@@<nul> 2 +de p PP-DE_OBJ@@Sint 2 +le det NP@@<nul> 3 +côté nc <nul>@@<nul> 4 +de p PP@@<nul> 4 +lui pro NP@@<nul> 5 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +Aramis npp <nul>@@<nul> 0 +était v <nul>@@<nul> 0 +à p <nul>@@<nul> 0 +son det NP@@<nul> 0 +poste nc <nul>@@<nul> 1 +, ponct <nul>@@<nul> 0 +il cls-suj VN@@<nul> 0 +était v <nul>@@<nul> 1 +tombé vpp <nul>@@<nul> 1 +de p PP-DE_OBJ@@Sint-MOD 1 +ses det NP@@<nul> 2 +bras nc <nul>@@<nul> 3 +. ponct <nul>@@<nul> 0 +</s> </s> </s> 0 +<s> <s> <s> 0 +Athos npp <nul>@@<nul> 0 +l' det NP-MOD@@<nul> 0 +attendait nc <nul>@@<nul> 1 +avec p PP-MOD@@<nul> 1 +une det NP@@<nul> 2 +inquiétude nc <nul>@@<nul> 3 +singulière adj AP@@<nul> 3 +; ponct <nul>@@<nul> 0 +il cls-suj VN@@<nul> 0 +était v <nul>@@<nul> 1 +debout adj AP-ATS@@<nul> 1 +plutôt adv <nul>@@Sint-MOD 1 +silencieusement adv <nul>@@<nul> 1 +que cs Ssub-OBJ@@<nul> 1 +le det NP-SUJ@@<nul> 2 +poignard nc <nul>@@<nul> 3 +sur p PP@@<nul> 3 +le det NP@@<nul> 4 +front nc <nul>@@<nul> 5 +. ponct <nul>@@<nul> 3 +</s> </s> </s> 3 +<s> <s> <s> 4 +Athos npp <nul>@@<nul> 4 +l' det NP@@<nul> 4 +attendait v <nul>@@<nul> 5 +avec p PP@@<nul> 4 +impatience nc NP@@<nul> 5 +, ponct <nul>@@Sint 3 +il cls-suj VN@@<nul> 3 +fut v <nul>@@<nul> 4 +saisi vpp <nul>@@<nul> 4 +d' p PP-DE_OBJ@@<nul> 3 +un det NP@@<nul> 4 +silence nc <nul>@@<nul> 5 +. ponct <nul>@@<nul> 0 + + +<s> -- Le parlement ... </s> <s> -- Vous , dit Athos , je ne me reconnais pas . </s> <s> -- M. d' Artagnan . </s> <s> Acté ? </s> <s> -- Oui , répondit le mousquetaire . </s> <s> -- Alors , dit le roi , il faut que vous ne le rendiez pas . </s> <s> -- Non , il est vrai , je le suis , mais il m' a semblé ... </s> <s> -- </s> <s> Il a répondu : voilà tout . </s> <s> Je ne puis dire , car il est bien facile à comprendre . </s> <s> -- Mais le moins ne rendra -t -il donc ? </s> <s> -- Non . </s> <s> -- </s> <s> J' ai vu Athos . </s> <s> -- Sans doute , mais il est vrai . </s> <s> -- Eh bien ! pardieu ! </s> <s> je ne veux pas qu' il soit avec lui . </s> <s> -- le roi? pas Aramis ? </s> <s> -- Oui . </s> <s> -- Oui , il est évêque , dit Athos , à tous ses membres nous autres amis , et nous aurons l' honneur de nous défendre . </s> <s> -- Que diable ne le voulez -vous pas ? </s> <s> -- Aramis , qu' il vous a dit . </s> <s> -- Mais , dit d' Artagnan , je ne l' ai point aperçu , je ne le sais . </s> <s> -- Mordaunt , qu' il faut le repos , il nous rejoindrait sur le même champ de bataille . </s> <s> -- Est -ce lui ? demanda Aramis . </s> <s> Porthos . </s> <s> -- Mordaunt , dit Athos , il est un homme le premier , un homme qui s' est passé . </s> <s> -- il est toujours le moins de le monde , mais il ne nous aurait -il pas de le monde ? </s> <s> -- nous l' avons dit , nous l' avons dit , nous l' avons dit , les avons vu . </s> <s> -- Aramis ! </s> <s> Je m' en doutais . </s> <s> -- Aramis , je le sais , mais il venait me donner de l' hospitalité . </s> <s> -- Oui , je l' ignore ; il s' agit d' être élevé , moi . </s> <s> Athos et Porthos se regardaient avec politesse avec étonnement . </s> <s> Porthos , qui n' avait pas d' esprit , et l' avait vu le roi , il était comme ami de son beau-frère . </s> <s> Athos était debout , comme le regardait de le côté de lui . </s> <s> Aramis était à son poste , il était tombé de ses bras . </s> <s> Athos l' attendait avec une inquiétude singulière ; il était debout plutôt silencieusement que le poignard sur le front . </s> <s> Athos l' attendait avec impatience , il fut saisi d' un silence . \ No newline at end of file diff --git a/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb b/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb index 478eb29..a918ace 100644 --- a/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb +++ b/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb @@ -987,7 +987,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 31, "id": "bc51ab44-6885-45cc-bad2-6a43a7791fdb", "metadata": {}, "outputs": [], @@ -1004,10 +1004,10 @@ " token = cols[0]\n", " \n", " if token == '<s>':\n", - " current_sentence = []\n", + " current_sentence = [cols]\n", " elif token == '</s>':\n", " if current_sentence:\n", - " sentences.append(current_sentence)\n", + " sentences.append(current_sentence + [cols])\n", " else:\n", " current_sentence.append(cols)\n", " return sentences" @@ -1015,7 +1015,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 38, "id": "673d6a76-42a0-4dcd-9c54-ae18710a032a", "metadata": {}, "outputs": [ @@ -1023,7 +1023,157 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[['--', 'ponct', '<nul>@@<nul>', '0'], ['Le', 'det', 'NP@@<nul>', '0'], ['parlement', 'nc', '<nul>@@<nul>', '1'], ['...', 'ponct', '<nul>@@SENT', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Vous', 'pro', 'VN@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', '<nul>@@<nul>', '1'], ['Athos', 'npp', 'NP-OBJ@@Sint-MOD', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['me', 'v', '<nul>@@<nul>', '2'], ['reconnais', 'v', '<nul>@@<nul>', '2'], ['pas', 'adv', '<nul>@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['M.', 'npp', 'NP-SUJ@@<nul>', '1'], [\"d'\", 'p', 'NPP+@@<nul>', '2'], ['Artagnan', 'npp', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Acté', 'nc', 'NP-OBJ@@<nul>', '1'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Oui', 'adj', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['répondit', 'v', 'VN@@<nul>', '1'], ['le', 'det', 'NP-OBJ@@Sint-MOD', '2'], ['mousquetaire', 'nc', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Alors', 'adv', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', 'VN@@<nul>', '1'], ['le', 'det', 'NP-OBJ@@Sint-MOD', '1'], ['roi', 'nc', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], ['faut', 'v', '<nul>@@<nul>', '2'], ['que', 'cs', 'Ssub-OBJ@@<nul>', '1'], ['vous', 'cls-suj', 'VN@@<nul>', '2'], ['ne', 'adv', '<nul>@@<nul>', '3'], ['le', 'clo-obj', '<nul>@@<nul>', '3'], ['rendiez', 'v', '<nul>@@<nul>', '3'], ['pas', 'adv', '<nul>@@Sint', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Non', 'adv', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], ['est', 'v', '<nul>@@<nul>', '2'], ['vrai', 'adj', 'AP-ATS@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '2'], ['je', 'cls-suj', '<nul>@@<nul>', '2'], ['le', 'clo-obj', 'NP-OBJ@@<nul>', '2'], ['suis', 'v', '<nul>@@<nul>', '3'], [',', 'ponct', '<nul>@@<nul>', '2'], ['mais', 'cc', 'COORD@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '3'], [\"m'\", 'clr', '<nul>@@<nul>', '4'], ['a', 'v', '<nul>@@<nul>', '4'], ['semblé', 'vpp', '<nul>@@<nul>', '4'], ['...', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1']], [['Il', 'cls-suj', 'VN@@Sint-MOD', '1'], ['a', 'v', '<nul>@@<nul>', '2'], ['répondu', 'vpp', '<nul>@@<nul>', '2'], [':', 'ponct', '<nul>@@<nul>', '0'], ['voilà', 'vinf', 'VN@@<nul>', '0'], ['tout', 'adv', 'NP-OBJ@@Sint-MOD', '1'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Je', 'cls-suj', 'VN@@Sint-MOD', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['puis', 'adv', '<nul>@@<nul>', '2'], ['dire', 'v', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['car', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['bien', 'adv', '<nul>@@<nul>', '2'], ['facile', 'adj', 'AP-ATS@@<nul>', '2'], ['à', 'p', 'PP@@<nul>', '2'], ['comprendre', 'vinf', 'VN@@<nul>', '3'], ['.', 'ponct', '<nul>@@VPinf', '4']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['Mais', 'cc', 'COORD@@<nul>', '5'], ['le', 'det', 'NP@@<nul>', '6'], ['moins', 'adv', '<nul>@@<nul>', '7'], ['ne', 'adv', 'VN@@<nul>', '7'], ['rendra', 'v', '<nul>@@<nul>', '8'], ['-t', 'vinf', 'VN@@VPpart', '8'], ['-il', 'cls-suj', 'P+@@VPinf-OBJ', '9'], ['donc', 'adv', '<nul>@@<nul>', '10'], ['?', 'ponct', '<nul>@@<nul>', '10']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['Non', 'adv', 'AP@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0']], [[\"J'\", 'cls-suj', 'VN@@<nul>', '0'], ['ai', 'v', '<nul>@@<nul>', '1'], ['vu', 'vpp', '<nul>@@<nul>', '1'], ['Athos', 'npp', 'NP-OBJ@@Sint-MOD', '1'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Sans', 'p', 'PP-MOD@@<nul>', '0'], ['doute', 'nc', 'NP@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['mais', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['vrai', 'adj', 'AP-ATS@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Eh', 'npp', '<nul>@@<nul>', '0'], ['bien', 'adv', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0'], ['pardieu', 'nc', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0']], [['je', 'cls-suj', 'VN@@<nul>', '0'], ['ne', 'adv', '<nul>@@<nul>', '1'], ['veux', 'v', '<nul>@@<nul>', '1'], ['pas', 'adv', '<nul>@@VPinf-OBJ', '1'], [\"qu'\", 'adv', 'Ssub-OBJ@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['soit', 'vs', '<nul>@@<nul>', '3'], ['avec', 'p', 'PP-MOD@@Sint', '3'], ['lui', 'pro', 'NP@@<nul>', '4'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['le', 'det', 'NP-SUJ@@<nul>', '0'], ['roi?', 'nc', '<nul>@@<nul>', '1'], ['pas', 'adv', 'AP@@<nul>', '1'], ['Aramis', 'adj', '<nul>@@<nul>', '2'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['est', 'v', '<nul>@@<nul>', '1'], ['évêque', 'adj', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@Sint-MOD', '1'], ['dit', 'v', 'VN@@<nul>', '1'], ['Athos', 'npp', 'NP-SUJ@@Sint-MOD', '2'], [',', 'ponct', '<nul>@@<nul>', '3'], ['à', 'p', 'PP-MOD@@<nul>', '3'], ['tous', 'adj', 'NP@@<nul>', '4'], ['ses', 'det', '<nul>@@<nul>', '5'], ['membres', 'nc', '<nul>@@<nul>', '5'], ['nous', 'cls', 'VN@@<nul>', '4'], ['autres', 'adj', '<nul>@@<nul>', '5'], ['amis', 'vpp', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@Sint', '5'], ['et', 'cc', 'COORD@@<nul>', '3'], ['nous', 'cls-suj', 'VN@@<nul>', '4'], ['aurons', 'v', '<nul>@@<nul>', '5'], [\"l'\", 'det', 'NP-OBJ@@<nul>', '4'], ['honneur', 'nc', '<nul>@@<nul>', '5'], ['de', 'p', 'PP@@<nul>', '5'], ['nous', 'pro', 'NP@@<nul>', '6'], ['défendre', 'vinf', '<nul>@@<nul>', '7'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Que', 'prorel', '<nul>@@<nul>', '0'], ['diable', 'nc', '<nul>@@<nul>', '0'], ['ne', 'adv', 'VN@@<nul>', '0'], ['le', 'clo-obj', '<nul>@@<nul>', '1'], ['voulez', 'v', '<nul>@@<nul>', '1'], ['-vous', 'clo-a_obj', '<nul>@@VPinf-OBJ', '1'], ['pas', 'adv', '<nul>@@<nul>', '1'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Aramis', 'npp', 'NP-MOD@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], [\"qu'\", 'cs', 'NP-MOD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@Srel', '2'], ['vous', 'clo-a_obj', '<nul>@@<nul>', '3'], ['a', 'v', '<nul>@@<nul>', '3'], ['dit', 'vpp', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Mais', 'cc', 'COORD@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', 'VN@@<nul>', '1'], [\"d'\", 'p', 'PP-DE_OBJ@@<nul>', '1'], ['Artagnan', 'npp', 'NP@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '2'], ['ai', 'v', '<nul>@@<nul>', '2'], ['point', 'vpp', '<nul>@@<nul>', '2'], ['aperçu', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@Sint-MOD', '2'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['le', 'clo-obj', '<nul>@@<nul>', '2'], ['sais', 'v', '<nul>@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '2']], [['--', 'ponct', '<nul>@@<nul>', '2'], ['Mordaunt', 'npp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '2'], [\"qu'\", 'cs', 'Ssub-OBJ@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '3'], ['faut', 'v', '<nul>@@<nul>', '4'], ['le', 'det', 'NP-OBJ@@Sint', '4'], ['repos', 'nc', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['nous', 'clo-a_obj', '<nul>@@<nul>', '3'], ['rejoindrait', 'v', '<nul>@@<nul>', '3'], ['sur', 'p', 'PP-P_OBJ@@Sint', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['même', 'adj', '<nul>@@<nul>', '5'], ['champ', 'nc', '<nul>@@<nul>', '5'], ['de', 'p', 'PP@@<nul>', '5'], ['bataille', 'nc', 'NP@@<nul>', '6'], ['.', 'ponct', '<nul>@@<nul>', '2']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Est', 'nc', 'NP-OBJ@@<nul>', '1'], ['-ce', 'det', 'NP@@<nul>', '2'], ['lui', 'pro', '<nul>@@<nul>', '3'], ['?', 'ponct', '<nul>@@<nul>', '1'], ['demanda', 'v', 'VN@@<nul>', '1'], ['Aramis', 'npp', 'NP-OBJ@@Sint-MOD', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Porthos', 'npp', '<nul>@@<nul>', '0'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Mordaunt', 'npp', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['dit', 'v', 'VN@@<nul>', '0'], ['Athos', 'npp', 'NP-SUJ@@Sint-MOD', '1'], [',', 'ponct', '<nul>@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['un', 'det', 'NP-OBJ@@Sint', '3'], ['homme', 'nc', '<nul>@@<nul>', '4'], ['le', 'det', 'NP-OBJ@@<nul>', '4'], ['premier', 'adj', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '5'], ['un', 'det', 'NP@@<nul>', '5'], ['homme', 'nc', '<nul>@@<nul>', '6'], ['qui', 'prorel', 'NP-SUJ@@<nul>', '6'], [\"s'\", 'clr', 'VN@@Srel', '7'], ['est', 'v', '<nul>@@<nul>', '8'], ['passé', 'vpp', '<nul>@@<nul>', '8'], ['.', 'ponct', '<nul>@@<nul>', '5']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['il', 'cls-suj', 'VN@@<nul>', '5'], ['est', 'v', '<nul>@@<nul>', '6'], ['toujours', 'adv', '<nul>@@VPpart', '6'], ['le', 'det', 'NP-OBJ@@<nul>', '6'], ['moins', 'adv', '<nul>@@<nul>', '7'], ['de', 'p', 'PP@@<nul>', '7'], ['le', 'det', 'NP@@<nul>', '8'], ['monde', 'nc', '<nul>@@<nul>', '9'], [',', 'ponct', '<nul>@@<nul>', '5'], ['mais', 'cc', 'COORD@@<nul>', '5'], ['il', 'cls-suj', 'VN@@<nul>', '6'], ['ne', 'adv', '<nul>@@<nul>', '7'], ['nous', 'clo-a_obj', '<nul>@@<nul>', '7'], ['aurait', 'v', '<nul>@@<nul>', '7'], ['-il', 'pro', '<nul>@@<nul>', '7'], ['pas', 'adv', '<nul>@@<nul>', '7'], ['de', 'p', 'PP@@<nul>', '4'], ['le', 'det', 'NP@@<nul>', '5'], ['monde', 'nc', '<nul>@@<nul>', '6'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['nous', 'cls-suj', 'VN@@<nul>', '0'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '1'], ['avons', 'v', '<nul>@@<nul>', '1'], ['dit', 'vpp', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@Sint', '1'], ['nous', 'cls-suj', 'VN@@<nul>', '1'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '2'], ['avons', 'v', '<nul>@@<nul>', '2'], ['dit', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@Sint-MOD', '2'], ['nous', 'cls-suj', 'VN@@<nul>', '1'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '2'], ['avons', 'v', '<nul>@@<nul>', '2'], ['dit', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['les', 'det', 'NP-OBJ@@<nul>', '1'], ['avons', 'nc', '<nul>@@<nul>', '2'], ['vu', 'vpp', 'VPpart@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Aramis', 'npp', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0']], [['Je', 'cls-suj', 'VN@@<nul>', '0'], [\"m'\", 'clr', '<nul>@@<nul>', '1'], ['en', 'clo', '<nul>@@<nul>', '1'], ['doutais', 'v', '<nul>@@<nul>', '1'], ['.', 'ponct', '<nul>@@<nul>', '1']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Aramis', 'npp', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['le', 'clo-obj', '<nul>@@<nul>', '2'], ['sais', 'v', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['mais', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['venait', 'v', '<nul>@@<nul>', '3'], ['me', 'det', '<nul>@@<nul>', '2'], ['donner', 'vinf', 'VN@@<nul>', '2'], ['de', 'p', 'PP-DE_OBJ@@VPinf-OBJ', '3'], [\"l'\", 'det', 'NP@@<nul>', '4'], ['hospitalité', 'nc', '<nul>@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['je', 'cls-suj', 'VN@@<nul>', '0'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '1'], ['ignore', 'v', '<nul>@@<nul>', '1'], [';', 'ponct', '<nul>@@Sint-MOD', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], [\"s'\", 'clr', '<nul>@@<nul>', '2'], ['agit', 'v', '<nul>@@<nul>', '2'], [\"d'\", 'p', 'PP-DE_OBJ@@Sint-MOD', '2'], ['être', 'vinf', 'VN@@<nul>', '3'], ['élevé', 'vpp', '<nul>@@<nul>', '4'], [',', 'ponct', '<nul>@@VPinf', '4'], ['moi', 'adj', '<nul>@@<nul>', '4'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], ['et', 'cc', 'COORD@@<nul>', '0'], ['Porthos', 'npp', 'NP@@<nul>', '1'], ['se', 'clr', 'VN@@<nul>', '1'], ['regardaient', 'v', '<nul>@@<nul>', '2'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['politesse', 'nc', 'NP@@<nul>', '2'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['étonnement', 'nc', 'NP@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Porthos', 'npp', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['qui', 'prorel', 'NP-SUJ@@<nul>', '0'], [\"n'\", 'adv', 'VN@@Srel', '1'], ['avait', 'v', '<nul>@@<nul>', '2'], ['pas', 'adv', '<nul>@@<nul>', '2'], [\"d'\", 'det', 'PP-DE_OBJ@@<nul>', '1'], ['esprit', 'nc', 'NP@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['et', 'cc', 'COORD@@<nul>', '1'], [\"l'\", 'det', 'NP-SUJ@@<nul>', '2'], ['avait', 'v', '<nul>@@<nul>', '3'], ['vu', 'vpp', 'VPpart@@<nul>', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['roi', 'nc', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '3'], ['il', 'cls-suj', 'VN@@<nul>', '3'], ['était', 'v', '<nul>@@<nul>', '4'], ['comme', 'p', 'PP-MOD@@<nul>', '4'], ['ami', 'nc', '<nul>@@<nul>', '4'], ['de', 'p', 'PP-DE_OBJ@@VPinf', '4'], ['son', 'det', 'NP@@<nul>', '5'], ['beau-frère', 'nc', '<nul>@@<nul>', '6'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '0'], ['debout', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['comme', 'cs', 'Ssub-MOD@@<nul>', '0'], ['le', 'det', 'VN@@<nul>', '1'], ['regardait', 'v', '<nul>@@<nul>', '2'], ['de', 'p', 'PP-DE_OBJ@@Sint', '2'], ['le', 'det', 'NP@@<nul>', '3'], ['côté', 'nc', '<nul>@@<nul>', '4'], ['de', 'p', 'PP@@<nul>', '4'], ['lui', 'pro', 'NP@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Aramis', 'npp', '<nul>@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '0'], ['à', 'p', '<nul>@@<nul>', '0'], ['son', 'det', 'NP@@<nul>', '0'], ['poste', 'nc', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '1'], ['tombé', 'vpp', '<nul>@@<nul>', '1'], ['de', 'p', 'PP-DE_OBJ@@Sint-MOD', '1'], ['ses', 'det', 'NP@@<nul>', '2'], ['bras', 'nc', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], [\"l'\", 'det', 'NP-MOD@@<nul>', '0'], ['attendait', 'nc', '<nul>@@<nul>', '1'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['une', 'det', 'NP@@<nul>', '2'], ['inquiétude', 'nc', '<nul>@@<nul>', '3'], ['singulière', 'adj', 'AP@@<nul>', '3'], [';', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '1'], ['debout', 'adj', 'AP-ATS@@<nul>', '1'], ['plutôt', 'adv', '<nul>@@Sint-MOD', '1'], ['silencieusement', 'adv', '<nul>@@<nul>', '1'], ['que', 'cs', 'Ssub-OBJ@@<nul>', '1'], ['le', 'det', 'NP-SUJ@@<nul>', '2'], ['poignard', 'nc', '<nul>@@<nul>', '3'], ['sur', 'p', 'PP@@<nul>', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['front', 'nc', '<nul>@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '3']]]\n" + "--\tponct\t<nul>@@<nul>\t0\n", + "Le\tdet\tNP@@<nul>\t0\n", + "parlement\tnc\t<nul>@@<nul>\t1\n", + "...\tponct\t<nul>@@SENT\t0\n", + "</s>\t</s>\t</s>\t0\n", + "\n", + " SENT \n", + " | \n", + " SENT \n", + " ___________|____________ \n", + " ponct | | \n", + " ____|____ | | \n", + " | NP | </s>\n", + " | ____|______ | | \n", + " | det nc ponct </s>\n", + " | | | | | \n", + " -- Le parlement ... </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (SENT\n", + " (ponct -- (NP (det Le) (nc parlement)))\n", + " (ponct ...)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 5\n", + "<s>\t<s>\t<s>\t1\n", + "--\tponct\t<nul>@@<nul>\t0\n", + "Vous\tpro\tVN@@<nul>\t0\n", + ",\tponct\t<nul>@@<nul>\t1\n", + "dit\tv\t<nul>@@<nul>\t1\n", + "Athos\tnpp\tNP-OBJ@@Sint-MOD\t1\n", + ",\tponct\t<nul>@@<nul>\t1\n", + "je\tcls-suj\tVN@@<nul>\t1\n", + "ne\tadv\t<nul>@@<nul>\t2\n", + "me\tv\t<nul>@@<nul>\t2\n", + "reconnais\tv\t<nul>@@<nul>\t2\n", + "pas\tadv\t<nul>@@<nul>\t2\n", + ".\tponct\t<nul>@@<nul>\t0\n", + "</s>\t</s>\t</s>\t0\n", + "\n", + " SENT \n", + " | \n", + " <s> \n", + " _________________________________|__________________________________________ \n", + " | | Sint-MOD | | \n", + " | | _________________|__________________ | | \n", + " | | VN NP-OBJ | VN | </s>\n", + " | | _____|____ | | _________|_____________ | | \n", + "<s> ponct pro ponct v npp ponct cls-suj adv v v adv ponct </s>\n", + " | | | | | | | | | | | | | | \n", + "<s> -- Vous , dit Athos , je ne me reconnais pas . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (<s> <s>)\n", + " (ponct --)\n", + " (Sint-MOD\n", + " (VN (pro Vous) (ponct ,) (v dit))\n", + " (NP-OBJ (npp Athos))\n", + " (ponct ,)\n", + " (VN (cls-suj je) (adv ne) (v me) (v reconnais) (adv pas)))\n", + " (ponct .)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 5\n", + "<s>\t<s>\t<s>\t1\n", + "--\tponct\t<nul>@@<nul>\t1\n", + "M.\tnpp\tNP-SUJ@@<nul>\t1\n", + "d'\tp\tNPP+@@<nul>\t2\n", + "Artagnan\tnpp\t<nul>@@<nul>\t3\n", + ".\tponct\t<nul>@@<nul>\t0\n", + "</s>\t</s>\t</s>\t0\n", + "\n", + " SENT \n", + " | \n", + " <s> \n", + " ____|_________________________ \n", + " <s> | | \n", + " _________|____ | | \n", + " | | NP-SUJ | | \n", + " | | ____|_____ | | \n", + " | | | NPP+ | </s>\n", + " | | | _____|______ | | \n", + " | ponct npp p npp ponct </s>\n", + " | | | | | | | \n", + "<s> -- M. d' Artagnan . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (<s>\n", + " <s>\n", + " (ponct --)\n", + " (NP-SUJ (npp M.) (NPP+ (p d') (npp Artagnan))))\n", + " (ponct .)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 6\n", + "<s>\t<s>\t<s>\t1\n", + "Acté\tnc\tNP-OBJ@@<nul>\t1\n", + "?\tponct\t<nul>@@<nul>\t0\n", + "</s>\t</s>\t</s>\t0\n", + "\n", + " SENT \n", + " | \n", + " <s> \n", + " ____|___________ \n", + " <s> | | \n", + " ___|____ | | \n", + " | NP-OBJ | </s>\n", + " | | | | \n", + " | nc ponct </s>\n", + " | | | | \n", + "<s> Acté ? </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s> (<s> <s> (NP-OBJ (nc Acté))) (ponct ?) (</s> (</s> </s>))))\n", + "Tree depth: 5\n", + "<s>\t<s>\t<s>\t1\n", + "--\tponct\t<nul>@@<nul>\t1\n", + "Oui\tadj\tAP-ATS@@<nul>\t1\n", + ",\tponct\t<nul>@@<nul>\t1\n", + "répondit\tv\tVN@@<nul>\t1\n", + "le\tdet\tNP-OBJ@@Sint-MOD\t2\n", + "mousquetaire\tnc\t<nul>@@<nul>\t3\n", + ".\tponct\t<nul>@@<nul>\t0\n", + "</s>\t</s>\t</s>\t0\n", + "\n", + " SENT \n", + " | \n", + " <s> \n", + " ______|_________________________________________ \n", + " <s> | | \n", + " _________________|_______________ | | \n", + " | | | | Sint-MOD | | \n", + " | | | | ________|_______ | | \n", + " | | AP-ATS | VN NP-OBJ | </s>\n", + " | | | | | _______|_________ | | \n", + " | ponct adj ponct v det nc ponct </s>\n", + " | | | | | | | | | \n", + "<s> -- Oui , répondit le mousquetaire . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (<s>\n", + " <s>\n", + " (ponct --)\n", + " (AP-ATS (adj Oui))\n", + " (ponct ,)\n", + " (Sint-MOD\n", + " (VN (v répondit))\n", + " (NP-OBJ (det le) (nc mousquetaire))))\n", + " (ponct .)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 6\n" ] } ], @@ -1031,7 +1181,16 @@ "file_conll = \"sp_generated_temp_0.7_all_data_73m_tok_pos_macro_positional_const-epoch-8-train_loss-4.3-val_loss-4.9.pt_500\"\n", "\n", "sents = readFile(file_conll)\n", - "print(sents)" + "exepts = []\n", + "for sent in sents[:5]:\n", + " sent_lst = ['\\t'.join(word)+'\\n' for word in sent]\n", + " sent_str = ''.join(sent_lst)\n", + " print(sent_str)\n", + " try:\n", + " visualize_const_prediction(sent_str)\n", + " except:\n", + " exepts.append(sent_str)\n", + "print(len(exepts))" ] }, { diff --git a/tania_scripts/tania-some-other-metrics.ipynb b/tania_scripts/tania-some-other-metrics.ipynb index 478eb29..a918ace 100644 --- a/tania_scripts/tania-some-other-metrics.ipynb +++ b/tania_scripts/tania-some-other-metrics.ipynb @@ -987,7 +987,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 31, "id": "bc51ab44-6885-45cc-bad2-6a43a7791fdb", "metadata": {}, "outputs": [], @@ -1004,10 +1004,10 @@ " token = cols[0]\n", " \n", " if token == '<s>':\n", - " current_sentence = []\n", + " current_sentence = [cols]\n", " elif token == '</s>':\n", " if current_sentence:\n", - " sentences.append(current_sentence)\n", + " sentences.append(current_sentence + [cols])\n", " else:\n", " current_sentence.append(cols)\n", " return sentences" @@ -1015,7 +1015,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 38, "id": "673d6a76-42a0-4dcd-9c54-ae18710a032a", "metadata": {}, "outputs": [ @@ -1023,7 +1023,157 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[['--', 'ponct', '<nul>@@<nul>', '0'], ['Le', 'det', 'NP@@<nul>', '0'], ['parlement', 'nc', '<nul>@@<nul>', '1'], ['...', 'ponct', '<nul>@@SENT', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Vous', 'pro', 'VN@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', '<nul>@@<nul>', '1'], ['Athos', 'npp', 'NP-OBJ@@Sint-MOD', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['me', 'v', '<nul>@@<nul>', '2'], ['reconnais', 'v', '<nul>@@<nul>', '2'], ['pas', 'adv', '<nul>@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['M.', 'npp', 'NP-SUJ@@<nul>', '1'], [\"d'\", 'p', 'NPP+@@<nul>', '2'], ['Artagnan', 'npp', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Acté', 'nc', 'NP-OBJ@@<nul>', '1'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Oui', 'adj', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['répondit', 'v', 'VN@@<nul>', '1'], ['le', 'det', 'NP-OBJ@@Sint-MOD', '2'], ['mousquetaire', 'nc', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Alors', 'adv', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', 'VN@@<nul>', '1'], ['le', 'det', 'NP-OBJ@@Sint-MOD', '1'], ['roi', 'nc', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], ['faut', 'v', '<nul>@@<nul>', '2'], ['que', 'cs', 'Ssub-OBJ@@<nul>', '1'], ['vous', 'cls-suj', 'VN@@<nul>', '2'], ['ne', 'adv', '<nul>@@<nul>', '3'], ['le', 'clo-obj', '<nul>@@<nul>', '3'], ['rendiez', 'v', '<nul>@@<nul>', '3'], ['pas', 'adv', '<nul>@@Sint', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Non', 'adv', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], ['est', 'v', '<nul>@@<nul>', '2'], ['vrai', 'adj', 'AP-ATS@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '2'], ['je', 'cls-suj', '<nul>@@<nul>', '2'], ['le', 'clo-obj', 'NP-OBJ@@<nul>', '2'], ['suis', 'v', '<nul>@@<nul>', '3'], [',', 'ponct', '<nul>@@<nul>', '2'], ['mais', 'cc', 'COORD@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '3'], [\"m'\", 'clr', '<nul>@@<nul>', '4'], ['a', 'v', '<nul>@@<nul>', '4'], ['semblé', 'vpp', '<nul>@@<nul>', '4'], ['...', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1']], [['Il', 'cls-suj', 'VN@@Sint-MOD', '1'], ['a', 'v', '<nul>@@<nul>', '2'], ['répondu', 'vpp', '<nul>@@<nul>', '2'], [':', 'ponct', '<nul>@@<nul>', '0'], ['voilà', 'vinf', 'VN@@<nul>', '0'], ['tout', 'adv', 'NP-OBJ@@Sint-MOD', '1'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Je', 'cls-suj', 'VN@@Sint-MOD', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['puis', 'adv', '<nul>@@<nul>', '2'], ['dire', 'v', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['car', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['bien', 'adv', '<nul>@@<nul>', '2'], ['facile', 'adj', 'AP-ATS@@<nul>', '2'], ['à', 'p', 'PP@@<nul>', '2'], ['comprendre', 'vinf', 'VN@@<nul>', '3'], ['.', 'ponct', '<nul>@@VPinf', '4']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['Mais', 'cc', 'COORD@@<nul>', '5'], ['le', 'det', 'NP@@<nul>', '6'], ['moins', 'adv', '<nul>@@<nul>', '7'], ['ne', 'adv', 'VN@@<nul>', '7'], ['rendra', 'v', '<nul>@@<nul>', '8'], ['-t', 'vinf', 'VN@@VPpart', '8'], ['-il', 'cls-suj', 'P+@@VPinf-OBJ', '9'], ['donc', 'adv', '<nul>@@<nul>', '10'], ['?', 'ponct', '<nul>@@<nul>', '10']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['Non', 'adv', 'AP@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0']], [[\"J'\", 'cls-suj', 'VN@@<nul>', '0'], ['ai', 'v', '<nul>@@<nul>', '1'], ['vu', 'vpp', '<nul>@@<nul>', '1'], ['Athos', 'npp', 'NP-OBJ@@Sint-MOD', '1'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Sans', 'p', 'PP-MOD@@<nul>', '0'], ['doute', 'nc', 'NP@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['mais', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['vrai', 'adj', 'AP-ATS@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Eh', 'npp', '<nul>@@<nul>', '0'], ['bien', 'adv', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0'], ['pardieu', 'nc', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0']], [['je', 'cls-suj', 'VN@@<nul>', '0'], ['ne', 'adv', '<nul>@@<nul>', '1'], ['veux', 'v', '<nul>@@<nul>', '1'], ['pas', 'adv', '<nul>@@VPinf-OBJ', '1'], [\"qu'\", 'adv', 'Ssub-OBJ@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['soit', 'vs', '<nul>@@<nul>', '3'], ['avec', 'p', 'PP-MOD@@Sint', '3'], ['lui', 'pro', 'NP@@<nul>', '4'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['le', 'det', 'NP-SUJ@@<nul>', '0'], ['roi?', 'nc', '<nul>@@<nul>', '1'], ['pas', 'adv', 'AP@@<nul>', '1'], ['Aramis', 'adj', '<nul>@@<nul>', '2'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['est', 'v', '<nul>@@<nul>', '1'], ['évêque', 'adj', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@Sint-MOD', '1'], ['dit', 'v', 'VN@@<nul>', '1'], ['Athos', 'npp', 'NP-SUJ@@Sint-MOD', '2'], [',', 'ponct', '<nul>@@<nul>', '3'], ['à', 'p', 'PP-MOD@@<nul>', '3'], ['tous', 'adj', 'NP@@<nul>', '4'], ['ses', 'det', '<nul>@@<nul>', '5'], ['membres', 'nc', '<nul>@@<nul>', '5'], ['nous', 'cls', 'VN@@<nul>', '4'], ['autres', 'adj', '<nul>@@<nul>', '5'], ['amis', 'vpp', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@Sint', '5'], ['et', 'cc', 'COORD@@<nul>', '3'], ['nous', 'cls-suj', 'VN@@<nul>', '4'], ['aurons', 'v', '<nul>@@<nul>', '5'], [\"l'\", 'det', 'NP-OBJ@@<nul>', '4'], ['honneur', 'nc', '<nul>@@<nul>', '5'], ['de', 'p', 'PP@@<nul>', '5'], ['nous', 'pro', 'NP@@<nul>', '6'], ['défendre', 'vinf', '<nul>@@<nul>', '7'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Que', 'prorel', '<nul>@@<nul>', '0'], ['diable', 'nc', '<nul>@@<nul>', '0'], ['ne', 'adv', 'VN@@<nul>', '0'], ['le', 'clo-obj', '<nul>@@<nul>', '1'], ['voulez', 'v', '<nul>@@<nul>', '1'], ['-vous', 'clo-a_obj', '<nul>@@VPinf-OBJ', '1'], ['pas', 'adv', '<nul>@@<nul>', '1'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Aramis', 'npp', 'NP-MOD@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], [\"qu'\", 'cs', 'NP-MOD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@Srel', '2'], ['vous', 'clo-a_obj', '<nul>@@<nul>', '3'], ['a', 'v', '<nul>@@<nul>', '3'], ['dit', 'vpp', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Mais', 'cc', 'COORD@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', 'VN@@<nul>', '1'], [\"d'\", 'p', 'PP-DE_OBJ@@<nul>', '1'], ['Artagnan', 'npp', 'NP@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '2'], ['ai', 'v', '<nul>@@<nul>', '2'], ['point', 'vpp', '<nul>@@<nul>', '2'], ['aperçu', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@Sint-MOD', '2'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['le', 'clo-obj', '<nul>@@<nul>', '2'], ['sais', 'v', '<nul>@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '2']], [['--', 'ponct', '<nul>@@<nul>', '2'], ['Mordaunt', 'npp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '2'], [\"qu'\", 'cs', 'Ssub-OBJ@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '3'], ['faut', 'v', '<nul>@@<nul>', '4'], ['le', 'det', 'NP-OBJ@@Sint', '4'], ['repos', 'nc', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['nous', 'clo-a_obj', '<nul>@@<nul>', '3'], ['rejoindrait', 'v', '<nul>@@<nul>', '3'], ['sur', 'p', 'PP-P_OBJ@@Sint', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['même', 'adj', '<nul>@@<nul>', '5'], ['champ', 'nc', '<nul>@@<nul>', '5'], ['de', 'p', 'PP@@<nul>', '5'], ['bataille', 'nc', 'NP@@<nul>', '6'], ['.', 'ponct', '<nul>@@<nul>', '2']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Est', 'nc', 'NP-OBJ@@<nul>', '1'], ['-ce', 'det', 'NP@@<nul>', '2'], ['lui', 'pro', '<nul>@@<nul>', '3'], ['?', 'ponct', '<nul>@@<nul>', '1'], ['demanda', 'v', 'VN@@<nul>', '1'], ['Aramis', 'npp', 'NP-OBJ@@Sint-MOD', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Porthos', 'npp', '<nul>@@<nul>', '0'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Mordaunt', 'npp', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['dit', 'v', 'VN@@<nul>', '0'], ['Athos', 'npp', 'NP-SUJ@@Sint-MOD', '1'], [',', 'ponct', '<nul>@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['un', 'det', 'NP-OBJ@@Sint', '3'], ['homme', 'nc', '<nul>@@<nul>', '4'], ['le', 'det', 'NP-OBJ@@<nul>', '4'], ['premier', 'adj', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '5'], ['un', 'det', 'NP@@<nul>', '5'], ['homme', 'nc', '<nul>@@<nul>', '6'], ['qui', 'prorel', 'NP-SUJ@@<nul>', '6'], [\"s'\", 'clr', 'VN@@Srel', '7'], ['est', 'v', '<nul>@@<nul>', '8'], ['passé', 'vpp', '<nul>@@<nul>', '8'], ['.', 'ponct', '<nul>@@<nul>', '5']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['il', 'cls-suj', 'VN@@<nul>', '5'], ['est', 'v', '<nul>@@<nul>', '6'], ['toujours', 'adv', '<nul>@@VPpart', '6'], ['le', 'det', 'NP-OBJ@@<nul>', '6'], ['moins', 'adv', '<nul>@@<nul>', '7'], ['de', 'p', 'PP@@<nul>', '7'], ['le', 'det', 'NP@@<nul>', '8'], ['monde', 'nc', '<nul>@@<nul>', '9'], [',', 'ponct', '<nul>@@<nul>', '5'], ['mais', 'cc', 'COORD@@<nul>', '5'], ['il', 'cls-suj', 'VN@@<nul>', '6'], ['ne', 'adv', '<nul>@@<nul>', '7'], ['nous', 'clo-a_obj', '<nul>@@<nul>', '7'], ['aurait', 'v', '<nul>@@<nul>', '7'], ['-il', 'pro', '<nul>@@<nul>', '7'], ['pas', 'adv', '<nul>@@<nul>', '7'], ['de', 'p', 'PP@@<nul>', '4'], ['le', 'det', 'NP@@<nul>', '5'], ['monde', 'nc', '<nul>@@<nul>', '6'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['nous', 'cls-suj', 'VN@@<nul>', '0'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '1'], ['avons', 'v', '<nul>@@<nul>', '1'], ['dit', 'vpp', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@Sint', '1'], ['nous', 'cls-suj', 'VN@@<nul>', '1'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '2'], ['avons', 'v', '<nul>@@<nul>', '2'], ['dit', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@Sint-MOD', '2'], ['nous', 'cls-suj', 'VN@@<nul>', '1'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '2'], ['avons', 'v', '<nul>@@<nul>', '2'], ['dit', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['les', 'det', 'NP-OBJ@@<nul>', '1'], ['avons', 'nc', '<nul>@@<nul>', '2'], ['vu', 'vpp', 'VPpart@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Aramis', 'npp', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0']], [['Je', 'cls-suj', 'VN@@<nul>', '0'], [\"m'\", 'clr', '<nul>@@<nul>', '1'], ['en', 'clo', '<nul>@@<nul>', '1'], ['doutais', 'v', '<nul>@@<nul>', '1'], ['.', 'ponct', '<nul>@@<nul>', '1']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Aramis', 'npp', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['le', 'clo-obj', '<nul>@@<nul>', '2'], ['sais', 'v', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['mais', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['venait', 'v', '<nul>@@<nul>', '3'], ['me', 'det', '<nul>@@<nul>', '2'], ['donner', 'vinf', 'VN@@<nul>', '2'], ['de', 'p', 'PP-DE_OBJ@@VPinf-OBJ', '3'], [\"l'\", 'det', 'NP@@<nul>', '4'], ['hospitalité', 'nc', '<nul>@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['je', 'cls-suj', 'VN@@<nul>', '0'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '1'], ['ignore', 'v', '<nul>@@<nul>', '1'], [';', 'ponct', '<nul>@@Sint-MOD', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], [\"s'\", 'clr', '<nul>@@<nul>', '2'], ['agit', 'v', '<nul>@@<nul>', '2'], [\"d'\", 'p', 'PP-DE_OBJ@@Sint-MOD', '2'], ['être', 'vinf', 'VN@@<nul>', '3'], ['élevé', 'vpp', '<nul>@@<nul>', '4'], [',', 'ponct', '<nul>@@VPinf', '4'], ['moi', 'adj', '<nul>@@<nul>', '4'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], ['et', 'cc', 'COORD@@<nul>', '0'], ['Porthos', 'npp', 'NP@@<nul>', '1'], ['se', 'clr', 'VN@@<nul>', '1'], ['regardaient', 'v', '<nul>@@<nul>', '2'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['politesse', 'nc', 'NP@@<nul>', '2'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['étonnement', 'nc', 'NP@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Porthos', 'npp', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['qui', 'prorel', 'NP-SUJ@@<nul>', '0'], [\"n'\", 'adv', 'VN@@Srel', '1'], ['avait', 'v', '<nul>@@<nul>', '2'], ['pas', 'adv', '<nul>@@<nul>', '2'], [\"d'\", 'det', 'PP-DE_OBJ@@<nul>', '1'], ['esprit', 'nc', 'NP@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['et', 'cc', 'COORD@@<nul>', '1'], [\"l'\", 'det', 'NP-SUJ@@<nul>', '2'], ['avait', 'v', '<nul>@@<nul>', '3'], ['vu', 'vpp', 'VPpart@@<nul>', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['roi', 'nc', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '3'], ['il', 'cls-suj', 'VN@@<nul>', '3'], ['était', 'v', '<nul>@@<nul>', '4'], ['comme', 'p', 'PP-MOD@@<nul>', '4'], ['ami', 'nc', '<nul>@@<nul>', '4'], ['de', 'p', 'PP-DE_OBJ@@VPinf', '4'], ['son', 'det', 'NP@@<nul>', '5'], ['beau-frère', 'nc', '<nul>@@<nul>', '6'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '0'], ['debout', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['comme', 'cs', 'Ssub-MOD@@<nul>', '0'], ['le', 'det', 'VN@@<nul>', '1'], ['regardait', 'v', '<nul>@@<nul>', '2'], ['de', 'p', 'PP-DE_OBJ@@Sint', '2'], ['le', 'det', 'NP@@<nul>', '3'], ['côté', 'nc', '<nul>@@<nul>', '4'], ['de', 'p', 'PP@@<nul>', '4'], ['lui', 'pro', 'NP@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Aramis', 'npp', '<nul>@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '0'], ['à', 'p', '<nul>@@<nul>', '0'], ['son', 'det', 'NP@@<nul>', '0'], ['poste', 'nc', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '1'], ['tombé', 'vpp', '<nul>@@<nul>', '1'], ['de', 'p', 'PP-DE_OBJ@@Sint-MOD', '1'], ['ses', 'det', 'NP@@<nul>', '2'], ['bras', 'nc', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], [\"l'\", 'det', 'NP-MOD@@<nul>', '0'], ['attendait', 'nc', '<nul>@@<nul>', '1'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['une', 'det', 'NP@@<nul>', '2'], ['inquiétude', 'nc', '<nul>@@<nul>', '3'], ['singulière', 'adj', 'AP@@<nul>', '3'], [';', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '1'], ['debout', 'adj', 'AP-ATS@@<nul>', '1'], ['plutôt', 'adv', '<nul>@@Sint-MOD', '1'], ['silencieusement', 'adv', '<nul>@@<nul>', '1'], ['que', 'cs', 'Ssub-OBJ@@<nul>', '1'], ['le', 'det', 'NP-SUJ@@<nul>', '2'], ['poignard', 'nc', '<nul>@@<nul>', '3'], ['sur', 'p', 'PP@@<nul>', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['front', 'nc', '<nul>@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '3']]]\n" + "--\tponct\t<nul>@@<nul>\t0\n", + "Le\tdet\tNP@@<nul>\t0\n", + "parlement\tnc\t<nul>@@<nul>\t1\n", + "...\tponct\t<nul>@@SENT\t0\n", + "</s>\t</s>\t</s>\t0\n", + "\n", + " SENT \n", + " | \n", + " SENT \n", + " ___________|____________ \n", + " ponct | | \n", + " ____|____ | | \n", + " | NP | </s>\n", + " | ____|______ | | \n", + " | det nc ponct </s>\n", + " | | | | | \n", + " -- Le parlement ... </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (SENT\n", + " (ponct -- (NP (det Le) (nc parlement)))\n", + " (ponct ...)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 5\n", + "<s>\t<s>\t<s>\t1\n", + "--\tponct\t<nul>@@<nul>\t0\n", + "Vous\tpro\tVN@@<nul>\t0\n", + ",\tponct\t<nul>@@<nul>\t1\n", + "dit\tv\t<nul>@@<nul>\t1\n", + "Athos\tnpp\tNP-OBJ@@Sint-MOD\t1\n", + ",\tponct\t<nul>@@<nul>\t1\n", + "je\tcls-suj\tVN@@<nul>\t1\n", + "ne\tadv\t<nul>@@<nul>\t2\n", + "me\tv\t<nul>@@<nul>\t2\n", + "reconnais\tv\t<nul>@@<nul>\t2\n", + "pas\tadv\t<nul>@@<nul>\t2\n", + ".\tponct\t<nul>@@<nul>\t0\n", + "</s>\t</s>\t</s>\t0\n", + "\n", + " SENT \n", + " | \n", + " <s> \n", + " _________________________________|__________________________________________ \n", + " | | Sint-MOD | | \n", + " | | _________________|__________________ | | \n", + " | | VN NP-OBJ | VN | </s>\n", + " | | _____|____ | | _________|_____________ | | \n", + "<s> ponct pro ponct v npp ponct cls-suj adv v v adv ponct </s>\n", + " | | | | | | | | | | | | | | \n", + "<s> -- Vous , dit Athos , je ne me reconnais pas . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (<s> <s>)\n", + " (ponct --)\n", + " (Sint-MOD\n", + " (VN (pro Vous) (ponct ,) (v dit))\n", + " (NP-OBJ (npp Athos))\n", + " (ponct ,)\n", + " (VN (cls-suj je) (adv ne) (v me) (v reconnais) (adv pas)))\n", + " (ponct .)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 5\n", + "<s>\t<s>\t<s>\t1\n", + "--\tponct\t<nul>@@<nul>\t1\n", + "M.\tnpp\tNP-SUJ@@<nul>\t1\n", + "d'\tp\tNPP+@@<nul>\t2\n", + "Artagnan\tnpp\t<nul>@@<nul>\t3\n", + ".\tponct\t<nul>@@<nul>\t0\n", + "</s>\t</s>\t</s>\t0\n", + "\n", + " SENT \n", + " | \n", + " <s> \n", + " ____|_________________________ \n", + " <s> | | \n", + " _________|____ | | \n", + " | | NP-SUJ | | \n", + " | | ____|_____ | | \n", + " | | | NPP+ | </s>\n", + " | | | _____|______ | | \n", + " | ponct npp p npp ponct </s>\n", + " | | | | | | | \n", + "<s> -- M. d' Artagnan . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (<s>\n", + " <s>\n", + " (ponct --)\n", + " (NP-SUJ (npp M.) (NPP+ (p d') (npp Artagnan))))\n", + " (ponct .)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 6\n", + "<s>\t<s>\t<s>\t1\n", + "Acté\tnc\tNP-OBJ@@<nul>\t1\n", + "?\tponct\t<nul>@@<nul>\t0\n", + "</s>\t</s>\t</s>\t0\n", + "\n", + " SENT \n", + " | \n", + " <s> \n", + " ____|___________ \n", + " <s> | | \n", + " ___|____ | | \n", + " | NP-OBJ | </s>\n", + " | | | | \n", + " | nc ponct </s>\n", + " | | | | \n", + "<s> Acté ? </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s> (<s> <s> (NP-OBJ (nc Acté))) (ponct ?) (</s> (</s> </s>))))\n", + "Tree depth: 5\n", + "<s>\t<s>\t<s>\t1\n", + "--\tponct\t<nul>@@<nul>\t1\n", + "Oui\tadj\tAP-ATS@@<nul>\t1\n", + ",\tponct\t<nul>@@<nul>\t1\n", + "répondit\tv\tVN@@<nul>\t1\n", + "le\tdet\tNP-OBJ@@Sint-MOD\t2\n", + "mousquetaire\tnc\t<nul>@@<nul>\t3\n", + ".\tponct\t<nul>@@<nul>\t0\n", + "</s>\t</s>\t</s>\t0\n", + "\n", + " SENT \n", + " | \n", + " <s> \n", + " ______|_________________________________________ \n", + " <s> | | \n", + " _________________|_______________ | | \n", + " | | | | Sint-MOD | | \n", + " | | | | ________|_______ | | \n", + " | | AP-ATS | VN NP-OBJ | </s>\n", + " | | | | | _______|_________ | | \n", + " | ponct adj ponct v det nc ponct </s>\n", + " | | | | | | | | | \n", + "<s> -- Oui , répondit le mousquetaire . </s>\n", + "\n", + "NLTK TREE (SENT\n", + " (<s>\n", + " (<s>\n", + " <s>\n", + " (ponct --)\n", + " (AP-ATS (adj Oui))\n", + " (ponct ,)\n", + " (Sint-MOD\n", + " (VN (v répondit))\n", + " (NP-OBJ (det le) (nc mousquetaire))))\n", + " (ponct .)\n", + " (</s> (</s> </s>))))\n", + "Tree depth: 6\n" ] } ], @@ -1031,7 +1181,16 @@ "file_conll = \"sp_generated_temp_0.7_all_data_73m_tok_pos_macro_positional_const-epoch-8-train_loss-4.3-val_loss-4.9.pt_500\"\n", "\n", "sents = readFile(file_conll)\n", - "print(sents)" + "exepts = []\n", + "for sent in sents[:5]:\n", + " sent_lst = ['\\t'.join(word)+'\\n' for word in sent]\n", + " sent_str = ''.join(sent_lst)\n", + " print(sent_str)\n", + " try:\n", + " visualize_const_prediction(sent_str)\n", + " except:\n", + " exepts.append(sent_str)\n", + "print(len(exepts))" ] }, { -- GitLab