add readfile

507a7200 · Tatiana BLADIER · d625d561 · 507a7200 · 507a7200 · 507a7200
Commit 507a7200 authored 2 months ago by Tatiana BLADIER
--- a/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb
+++ b/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb
@@ -898,10 +898,33 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 27,
   "id": "8d4ecba9-89b8-4000-a061-aa16aa68a404",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:295: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n",
+      "  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float)\n",
+      "/home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:303: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.\n",
+      "  @torch.cuda.amp.custom_bwd\n",
+      "/home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:315: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n",
+      "  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float)\n",
+      "/home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:322: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.\n",
+      "  @torch.cuda.amp.custom_bwd\n",
+      "/home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:335: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n",
+      "  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float)\n",
+      "/home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:342: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.\n",
+      "  @torch.cuda.amp.custom_bwd\n",
+      "/home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:352: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n",
+      "  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float)\n",
+      "/home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:365: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.\n",
+      "  @torch.cuda.amp.custom_bwd\n"
+     ]
+    }
+   ],
   "source": [
    "from transform import *\n",
    "\n",
@@ -910,14 +933,14 @@
    "    tree = AttachJuxtaposeTree.totree(tokenlist, 'SENT')\n",
    "    AttachJuxtaposeTree.action2tree(tree, parsed).pretty_print()\n",
    "    nltk_tree = AttachJuxtaposeTree.action2tree(tree, parsed)\n",
-    "    #print(\"NLTK TREE\", nltk_tree)\n",
+    "    print(\"NLTK TREE\", nltk_tree)\n",
    "    depth = nltk_tree.height() - 1  # NLTK includes the leaf level as height 1, so subtract 1 for tree depth \n",
    "    print(\"Tree depth:\", depth)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 28,
   "id": "bfd3abf3-b83a-4817-85ad-654daf72be88",
   "metadata": {},
   "outputs": [
@@ -943,6 +966,17 @@
      " |    |    |    |    |     |                |    |   \n",
      "<s>   --   Eh bien? fit   -il               .   </s>\n",
      "\n",
+      "NLTK TREE (SENT\n",
+      "  (<s>\n",
+      "    (<s>\n",
+      "      <s>\n",
+      "      (ponct --)\n",
+      "      (npp Eh)\n",
+      "      (AP\n",
+      "        (adv bien?)\n",
+      "        (VPinf-OBJ\n",
+      "          (VN (v fit))\n",
+      "          (VN (cls-suj -il) (ponct .) (</s> (</s> </s>))))))))\n",
      "Tree depth: 8\n"
     ]
    }
@@ -953,10 +987,59 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 45,
   "id": "bc51ab44-6885-45cc-bad2-6a43a7791fdb",
   "metadata": {},
   "outputs": [],
+   "source": [
+    "def readFile(filepath):\n",
+    "    with open(filepath, \"r\") as inf:\n",
+    "        sentences = []\n",
+    "        current_sentence = []\n",
+    "        for line in inf:\n",
+    "            cols = line.strip().split('\\t')\n",
+    "\n",
+    "            if not cols or len(cols) != 4:\n",
+    "                continue\n",
+    "            token = cols[0]\n",
+    "        \n",
+    "            if token == '<s>':\n",
+    "                current_sentence = []\n",
+    "            elif token == '</s>':\n",
+    "                if current_sentence:\n",
+    "                    sentences.append(current_sentence)\n",
+    "            else:\n",
+    "                current_sentence.append(cols)\n",
+    "        return sentences"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "673d6a76-42a0-4dcd-9c54-ae18710a032a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[['--', 'ponct', '<nul>@@<nul>', '0'], ['Le', 'det', 'NP@@<nul>', '0'], ['parlement', 'nc', '<nul>@@<nul>', '1'], ['...', 'ponct', '<nul>@@SENT', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Vous', 'pro', 'VN@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', '<nul>@@<nul>', '1'], ['Athos', 'npp', 'NP-OBJ@@Sint-MOD', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['me', 'v', '<nul>@@<nul>', '2'], ['reconnais', 'v', '<nul>@@<nul>', '2'], ['pas', 'adv', '<nul>@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['M.', 'npp', 'NP-SUJ@@<nul>', '1'], [\"d'\", 'p', 'NPP+@@<nul>', '2'], ['Artagnan', 'npp', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Acté', 'nc', 'NP-OBJ@@<nul>', '1'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Oui', 'adj', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['répondit', 'v', 'VN@@<nul>', '1'], ['le', 'det', 'NP-OBJ@@Sint-MOD', '2'], ['mousquetaire', 'nc', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Alors', 'adv', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', 'VN@@<nul>', '1'], ['le', 'det', 'NP-OBJ@@Sint-MOD', '1'], ['roi', 'nc', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], ['faut', 'v', '<nul>@@<nul>', '2'], ['que', 'cs', 'Ssub-OBJ@@<nul>', '1'], ['vous', 'cls-suj', 'VN@@<nul>', '2'], ['ne', 'adv', '<nul>@@<nul>', '3'], ['le', 'clo-obj', '<nul>@@<nul>', '3'], ['rendiez', 'v', '<nul>@@<nul>', '3'], ['pas', 'adv', '<nul>@@Sint', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Non', 'adv', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], ['est', 'v', '<nul>@@<nul>', '2'], ['vrai', 'adj', 'AP-ATS@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '2'], ['je', 'cls-suj', '<nul>@@<nul>', '2'], ['le', 'clo-obj', 'NP-OBJ@@<nul>', '2'], ['suis', 'v', '<nul>@@<nul>', '3'], [',', 'ponct', '<nul>@@<nul>', '2'], ['mais', 'cc', 'COORD@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '3'], [\"m'\", 'clr', '<nul>@@<nul>', '4'], ['a', 'v', '<nul>@@<nul>', '4'], ['semblé', 'vpp', '<nul>@@<nul>', '4'], ['...', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1']], [['Il', 'cls-suj', 'VN@@Sint-MOD', '1'], ['a', 'v', '<nul>@@<nul>', '2'], ['répondu', 'vpp', '<nul>@@<nul>', '2'], [':', 'ponct', '<nul>@@<nul>', '0'], ['voilà', 'vinf', 'VN@@<nul>', '0'], ['tout', 'adv', 'NP-OBJ@@Sint-MOD', '1'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Je', 'cls-suj', 'VN@@Sint-MOD', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['puis', 'adv', '<nul>@@<nul>', '2'], ['dire', 'v', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['car', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['bien', 'adv', '<nul>@@<nul>', '2'], ['facile', 'adj', 'AP-ATS@@<nul>', '2'], ['à', 'p', 'PP@@<nul>', '2'], ['comprendre', 'vinf', 'VN@@<nul>', '3'], ['.', 'ponct', '<nul>@@VPinf', '4']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['Mais', 'cc', 'COORD@@<nul>', '5'], ['le', 'det', 'NP@@<nul>', '6'], ['moins', 'adv', '<nul>@@<nul>', '7'], ['ne', 'adv', 'VN@@<nul>', '7'], ['rendra', 'v', '<nul>@@<nul>', '8'], ['-t', 'vinf', 'VN@@VPpart', '8'], ['-il', 'cls-suj', 'P+@@VPinf-OBJ', '9'], ['donc', 'adv', '<nul>@@<nul>', '10'], ['?', 'ponct', '<nul>@@<nul>', '10']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['Non', 'adv', 'AP@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0']], [[\"J'\", 'cls-suj', 'VN@@<nul>', '0'], ['ai', 'v', '<nul>@@<nul>', '1'], ['vu', 'vpp', '<nul>@@<nul>', '1'], ['Athos', 'npp', 'NP-OBJ@@Sint-MOD', '1'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Sans', 'p', 'PP-MOD@@<nul>', '0'], ['doute', 'nc', 'NP@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['mais', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['vrai', 'adj', 'AP-ATS@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Eh', 'npp', '<nul>@@<nul>', '0'], ['bien', 'adv', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0'], ['pardieu', 'nc', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0']], [['je', 'cls-suj', 'VN@@<nul>', '0'], ['ne', 'adv', '<nul>@@<nul>', '1'], ['veux', 'v', '<nul>@@<nul>', '1'], ['pas', 'adv', '<nul>@@VPinf-OBJ', '1'], [\"qu'\", 'adv', 'Ssub-OBJ@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['soit', 'vs', '<nul>@@<nul>', '3'], ['avec', 'p', 'PP-MOD@@Sint', '3'], ['lui', 'pro', 'NP@@<nul>', '4'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['le', 'det', 'NP-SUJ@@<nul>', '0'], ['roi?', 'nc', '<nul>@@<nul>', '1'], ['pas', 'adv', 'AP@@<nul>', '1'], ['Aramis', 'adj', '<nul>@@<nul>', '2'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['est', 'v', '<nul>@@<nul>', '1'], ['évêque', 'adj', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@Sint-MOD', '1'], ['dit', 'v', 'VN@@<nul>', '1'], ['Athos', 'npp', 'NP-SUJ@@Sint-MOD', '2'], [',', 'ponct', '<nul>@@<nul>', '3'], ['à', 'p', 'PP-MOD@@<nul>', '3'], ['tous', 'adj', 'NP@@<nul>', '4'], ['ses', 'det', '<nul>@@<nul>', '5'], ['membres', 'nc', '<nul>@@<nul>', '5'], ['nous', 'cls', 'VN@@<nul>', '4'], ['autres', 'adj', '<nul>@@<nul>', '5'], ['amis', 'vpp', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@Sint', '5'], ['et', 'cc', 'COORD@@<nul>', '3'], ['nous', 'cls-suj', 'VN@@<nul>', '4'], ['aurons', 'v', '<nul>@@<nul>', '5'], [\"l'\", 'det', 'NP-OBJ@@<nul>', '4'], ['honneur', 'nc', '<nul>@@<nul>', '5'], ['de', 'p', 'PP@@<nul>', '5'], ['nous', 'pro', 'NP@@<nul>', '6'], ['défendre', 'vinf', '<nul>@@<nul>', '7'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Que', 'prorel', '<nul>@@<nul>', '0'], ['diable', 'nc', '<nul>@@<nul>', '0'], ['ne', 'adv', 'VN@@<nul>', '0'], ['le', 'clo-obj', '<nul>@@<nul>', '1'], ['voulez', 'v', '<nul>@@<nul>', '1'], ['-vous', 'clo-a_obj', '<nul>@@VPinf-OBJ', '1'], ['pas', 'adv', '<nul>@@<nul>', '1'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Aramis', 'npp', 'NP-MOD@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], [\"qu'\", 'cs', 'NP-MOD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@Srel', '2'], ['vous', 'clo-a_obj', '<nul>@@<nul>', '3'], ['a', 'v', '<nul>@@<nul>', '3'], ['dit', 'vpp', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Mais', 'cc', 'COORD@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', 'VN@@<nul>', '1'], [\"d'\", 'p', 'PP-DE_OBJ@@<nul>', '1'], ['Artagnan', 'npp', 'NP@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '2'], ['ai', 'v', '<nul>@@<nul>', '2'], ['point', 'vpp', '<nul>@@<nul>', '2'], ['aperçu', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@Sint-MOD', '2'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['le', 'clo-obj', '<nul>@@<nul>', '2'], ['sais', 'v', '<nul>@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '2']], [['--', 'ponct', '<nul>@@<nul>', '2'], ['Mordaunt', 'npp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '2'], [\"qu'\", 'cs', 'Ssub-OBJ@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '3'], ['faut', 'v', '<nul>@@<nul>', '4'], ['le', 'det', 'NP-OBJ@@Sint', '4'], ['repos', 'nc', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['nous', 'clo-a_obj', '<nul>@@<nul>', '3'], ['rejoindrait', 'v', '<nul>@@<nul>', '3'], ['sur', 'p', 'PP-P_OBJ@@Sint', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['même', 'adj', '<nul>@@<nul>', '5'], ['champ', 'nc', '<nul>@@<nul>', '5'], ['de', 'p', 'PP@@<nul>', '5'], ['bataille', 'nc', 'NP@@<nul>', '6'], ['.', 'ponct', '<nul>@@<nul>', '2']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Est', 'nc', 'NP-OBJ@@<nul>', '1'], ['-ce', 'det', 'NP@@<nul>', '2'], ['lui', 'pro', '<nul>@@<nul>', '3'], ['?', 'ponct', '<nul>@@<nul>', '1'], ['demanda', 'v', 'VN@@<nul>', '1'], ['Aramis', 'npp', 'NP-OBJ@@Sint-MOD', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Porthos', 'npp', '<nul>@@<nul>', '0'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Mordaunt', 'npp', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['dit', 'v', 'VN@@<nul>', '0'], ['Athos', 'npp', 'NP-SUJ@@Sint-MOD', '1'], [',', 'ponct', '<nul>@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['un', 'det', 'NP-OBJ@@Sint', '3'], ['homme', 'nc', '<nul>@@<nul>', '4'], ['le', 'det', 'NP-OBJ@@<nul>', '4'], ['premier', 'adj', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '5'], ['un', 'det', 'NP@@<nul>', '5'], ['homme', 'nc', '<nul>@@<nul>', '6'], ['qui', 'prorel', 'NP-SUJ@@<nul>', '6'], [\"s'\", 'clr', 'VN@@Srel', '7'], ['est', 'v', '<nul>@@<nul>', '8'], ['passé', 'vpp', '<nul>@@<nul>', '8'], ['.', 'ponct', '<nul>@@<nul>', '5']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['il', 'cls-suj', 'VN@@<nul>', '5'], ['est', 'v', '<nul>@@<nul>', '6'], ['toujours', 'adv', '<nul>@@VPpart', '6'], ['le', 'det', 'NP-OBJ@@<nul>', '6'], ['moins', 'adv', '<nul>@@<nul>', '7'], ['de', 'p', 'PP@@<nul>', '7'], ['le', 'det', 'NP@@<nul>', '8'], ['monde', 'nc', '<nul>@@<nul>', '9'], [',', 'ponct', '<nul>@@<nul>', '5'], ['mais', 'cc', 'COORD@@<nul>', '5'], ['il', 'cls-suj', 'VN@@<nul>', '6'], ['ne', 'adv', '<nul>@@<nul>', '7'], ['nous', 'clo-a_obj', '<nul>@@<nul>', '7'], ['aurait', 'v', '<nul>@@<nul>', '7'], ['-il', 'pro', '<nul>@@<nul>', '7'], ['pas', 'adv', '<nul>@@<nul>', '7'], ['de', 'p', 'PP@@<nul>', '4'], ['le', 'det', 'NP@@<nul>', '5'], ['monde', 'nc', '<nul>@@<nul>', '6'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['nous', 'cls-suj', 'VN@@<nul>', '0'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '1'], ['avons', 'v', '<nul>@@<nul>', '1'], ['dit', 'vpp', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@Sint', '1'], ['nous', 'cls-suj', 'VN@@<nul>', '1'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '2'], ['avons', 'v', '<nul>@@<nul>', '2'], ['dit', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@Sint-MOD', '2'], ['nous', 'cls-suj', 'VN@@<nul>', '1'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '2'], ['avons', 'v', '<nul>@@<nul>', '2'], ['dit', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['les', 'det', 'NP-OBJ@@<nul>', '1'], ['avons', 'nc', '<nul>@@<nul>', '2'], ['vu', 'vpp', 'VPpart@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Aramis', 'npp', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0']], [['Je', 'cls-suj', 'VN@@<nul>', '0'], [\"m'\", 'clr', '<nul>@@<nul>', '1'], ['en', 'clo', '<nul>@@<nul>', '1'], ['doutais', 'v', '<nul>@@<nul>', '1'], ['.', 'ponct', '<nul>@@<nul>', '1']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Aramis', 'npp', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['le', 'clo-obj', '<nul>@@<nul>', '2'], ['sais', 'v', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['mais', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['venait', 'v', '<nul>@@<nul>', '3'], ['me', 'det', '<nul>@@<nul>', '2'], ['donner', 'vinf', 'VN@@<nul>', '2'], ['de', 'p', 'PP-DE_OBJ@@VPinf-OBJ', '3'], [\"l'\", 'det', 'NP@@<nul>', '4'], ['hospitalité', 'nc', '<nul>@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['je', 'cls-suj', 'VN@@<nul>', '0'], [\"l'\", 'clo-obj', '<nul>@@<nul>', '1'], ['ignore', 'v', '<nul>@@<nul>', '1'], [';', 'ponct', '<nul>@@Sint-MOD', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], [\"s'\", 'clr', '<nul>@@<nul>', '2'], ['agit', 'v', '<nul>@@<nul>', '2'], [\"d'\", 'p', 'PP-DE_OBJ@@Sint-MOD', '2'], ['être', 'vinf', 'VN@@<nul>', '3'], ['élevé', 'vpp', '<nul>@@<nul>', '4'], [',', 'ponct', '<nul>@@VPinf', '4'], ['moi', 'adj', '<nul>@@<nul>', '4'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], ['et', 'cc', 'COORD@@<nul>', '0'], ['Porthos', 'npp', 'NP@@<nul>', '1'], ['se', 'clr', 'VN@@<nul>', '1'], ['regardaient', 'v', '<nul>@@<nul>', '2'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['politesse', 'nc', 'NP@@<nul>', '2'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['étonnement', 'nc', 'NP@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Porthos', 'npp', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['qui', 'prorel', 'NP-SUJ@@<nul>', '0'], [\"n'\", 'adv', 'VN@@Srel', '1'], ['avait', 'v', '<nul>@@<nul>', '2'], ['pas', 'adv', '<nul>@@<nul>', '2'], [\"d'\", 'det', 'PP-DE_OBJ@@<nul>', '1'], ['esprit', 'nc', 'NP@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['et', 'cc', 'COORD@@<nul>', '1'], [\"l'\", 'det', 'NP-SUJ@@<nul>', '2'], ['avait', 'v', '<nul>@@<nul>', '3'], ['vu', 'vpp', 'VPpart@@<nul>', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['roi', 'nc', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '3'], ['il', 'cls-suj', 'VN@@<nul>', '3'], ['était', 'v', '<nul>@@<nul>', '4'], ['comme', 'p', 'PP-MOD@@<nul>', '4'], ['ami', 'nc', '<nul>@@<nul>', '4'], ['de', 'p', 'PP-DE_OBJ@@VPinf', '4'], ['son', 'det', 'NP@@<nul>', '5'], ['beau-frère', 'nc', '<nul>@@<nul>', '6'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '0'], ['debout', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['comme', 'cs', 'Ssub-MOD@@<nul>', '0'], ['le', 'det', 'VN@@<nul>', '1'], ['regardait', 'v', '<nul>@@<nul>', '2'], ['de', 'p', 'PP-DE_OBJ@@Sint', '2'], ['le', 'det', 'NP@@<nul>', '3'], ['côté', 'nc', '<nul>@@<nul>', '4'], ['de', 'p', 'PP@@<nul>', '4'], ['lui', 'pro', 'NP@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Aramis', 'npp', '<nul>@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '0'], ['à', 'p', '<nul>@@<nul>', '0'], ['son', 'det', 'NP@@<nul>', '0'], ['poste', 'nc', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '1'], ['tombé', 'vpp', '<nul>@@<nul>', '1'], ['de', 'p', 'PP-DE_OBJ@@Sint-MOD', '1'], ['ses', 'det', 'NP@@<nul>', '2'], ['bras', 'nc', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], [\"l'\", 'det', 'NP-MOD@@<nul>', '0'], ['attendait', 'nc', '<nul>@@<nul>', '1'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['une', 'det', 'NP@@<nul>', '2'], ['inquiétude', 'nc', '<nul>@@<nul>', '3'], ['singulière', 'adj', 'AP@@<nul>', '3'], [';', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '1'], ['debout', 'adj', 'AP-ATS@@<nul>', '1'], ['plutôt', 'adv', '<nul>@@Sint-MOD', '1'], ['silencieusement', 'adv', '<nul>@@<nul>', '1'], ['que', 'cs', 'Ssub-OBJ@@<nul>', '1'], ['le', 'det', 'NP-SUJ@@<nul>', '2'], ['poignard', 'nc', '<nul>@@<nul>', '3'], ['sur', 'p', 'PP@@<nul>', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['front', 'nc', '<nul>@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '3']]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "file_conll = \"sp_generated_temp_0.7_all_data_73m_tok_pos_macro_positional_const-epoch-8-train_loss-4.3-val_loss-4.9.pt_500\"\n",
+    "\n",
+    "sents = readFile(file_conll)\n",
+    "print(sents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "729d7671-02be-443b-94ee-ddc79284aac4",
+   "metadata": {},
+   "outputs": [],
   "source": []
  }
 ],

 %% Cell type:markdown id:96858183-3e82-4a33-ba0f-1b21b5f36018 tags:
 ## Type-token ratio
 %% Cell type:code id:510c3726-366d-4e26-a2bb-b55391b473bd tags:
 ``` python
 import re
 import nltk
 from nltk.tokenize import sent_tokenize, word_tokenize
 # Download once if not already
 #nltk.download('punkt')
 ```
 %% Cell type:code id:ee59c294-fdcd-429e-a126-734480d1b0ba tags:
 ``` python
 sample_text = """
 <s>	<s>	<s>	0
 Aramis	npp	<nul>@@<nul>	0
 était	v	<nul>@@<nul>	0
 à	p	<nul>@@<nul>	0
 son	det	NP@@<nul>	0
 poste	nc	<nul>@@<nul>	1
 ,	ponct	<nul>@@<nul>	0
 il	cls-suj	VN@@<nul>	0
 était	v	<nul>@@<nul>	1
 tombé	vpp	<nul>@@<nul>	1
 de	p	PP-DE_OBJ@@Sint-MOD	1
 ses	det	NP@@<nul>	2
 bras	nc	<nul>@@<nul>	3
 .	ponct	<nul>@@<nul>	0
 </s>	</s>	</s>	0
 """
 ```
 %% Cell type:code id:b6ae41ef-116f-473d-b3f3-115d90fe65b7 tags:
 ``` python
 import string
 def compute_ttr_cleaned(text):
    """
    Compute the type/token ratio (TTR) from column-formatted text.
    - Only the first column is used (tokens).
    - Tokens are lowercased.
    - Punctuation tokens are ignored.
    Parameters:
    - text: str, the input text in column format
    Returns:
    - ttr: float, the type/token ratio
    """
    tokens = []
    for line in text.strip().splitlines():
        if line.strip():  # skip empty lines
            token = line.split('\t')[0].lower()
            if token not in string.punctuation:
                tokens.append(token)
    if not tokens:
        return 0.0
    types = set(tokens)
    return len(types) / len(tokens)
 ```
 %% Cell type:code id:2a882cc9-8f9d-4457-becb-d2e26ab3f14f tags:
 ``` python
 ttr = compute_ttr_cleaned(sample_text)
 print(f"Type/Token Ratio: {ttr:.3f}")
 ```
 %% Output
    Type/Token Ratio: 0.923
 %% Cell type:code id:8897dcc3-4218-4ee5-9984-17b9a6d8dce2 tags:
 ``` python
 def compute_ttr_by_pos(text):
    """
    Compute type/token ratios for verbs and nouns in column-formatted text.
    - Columns: token \t pos \t ...
    - Verbs: POS in {'v', 'vpp', 'vpr'}
    - Nouns: POS in {'nc', 'npp'}
    - Tokens are lowercased.
    - Punctuation is ignored.
    Returns:
    - A dictionary with TTRs for verbs and nouns.
    """
    import string
    verb_pos = {'v', 'vpp', 'vpr'}
    noun_pos = {'nc', 'npp'}
    verb_tokens = []
    noun_tokens = []
    for line in text.strip().splitlines():
        if line.strip():
            parts = line.split('\t')
            if len(parts) >= 2:
                token = parts[0].lower()
                pos = parts[1]
                # Ignore punctuation
                if token in string.punctuation:
                    continue
                if pos in verb_pos:
                    verb_tokens.append(token)
                elif pos in noun_pos:
                    noun_tokens.append(token)
    # Compute TTRs
    ttr_verb = len(set(verb_tokens)) / len(verb_tokens) if verb_tokens else 0.0
    ttr_noun = len(set(noun_tokens)) / len(noun_tokens) if noun_tokens else 0.0
    return {
        'verb_ttr': ttr_verb,
        'noun_ttr': ttr_noun,
    }
 ```
 %% Cell type:code id:1363f307-fa4b-43ba-93d5-2d1c11ceb9e4 tags:
 ``` python
 result = compute_ttr_by_pos(sample_text)
 print(f"Verb TTR: {result['verb_ttr']:.3f}")
 print(f"Noun TTR: {result['noun_ttr']:.3f}")
 ```
 %% Output
    Verb TTR: 0.667
    Noun TTR: 1.000
 %% Cell type:code id:1362e192-514a-4a77-a8cb-5c012026e2bb tags:
 ``` python
 def compute_nv_ratios(text):
    """
    Compute nominal/verb and verb/nominal ratios from column-formatted text.
    - Uses the second column (POS).
    - Verbs: 'v', 'vpp', 'vpr'
    - Nouns: 'nc', 'npp'
    Returns:
    - Dictionary with 'nominal_verb_ratio' and 'verb_nominal_ratio'
    """
    verb_pos = {'v', 'vpp', 'vpr'}
    noun_pos = {'nc', 'npp'}
    adj_pos = {'adj'}
    adv_pos = {'adv'}
    verb_count = 0
    noun_count = 0
    adj_count = 0
    adv_count = 0
    for line in text.strip().splitlines():
        if line.strip():
            parts = line.split('\t')
            if len(parts) >= 2:
                pos = parts[1]
                if pos in verb_pos:
                    verb_count += 1
                if pos in noun_pos:
                    noun_count += 1
                if pos in adj_pos:
                    adj_count += 1
                if pos in adv_pos:
                    adv_count += 1
    nominal_verb_ratio = noun_count / verb_count if verb_count else float('inf')
    verb_nominal_ratio = verb_count / noun_count if noun_count else float('inf')
    adv_verb_ratio = adv_count / verb_count if verb_count else float('inf')
    adj_noun_verb_ratio = (adj_count + noun_count) / verb_count if verb_count else float('inf')
    return {
        'nominal_verb_ratio': nominal_verb_ratio,
        'verb_nominal_ratio': verb_nominal_ratio,
        'adv_verb_ratio': adv_verb_ratio,
        'adj_noun_verb_ratio': adj_noun_verb_ratio
    }
 ```
 %% Cell type:code id:544ff6aa-4104-4580-a01f-97429ffcc228 tags:
 ``` python
 ratios = compute_nv_ratios(sample_text)
 print(f"Nominal/Verb Ratio: {ratios['nominal_verb_ratio']:.2f}")
 print(f"Verb/Nominal Ratio: {ratios['verb_nominal_ratio']:.2f}")
 print(f"Adverb/Verb Ratio: {ratios['adv_verb_ratio']:.2f}")
 print(f"Adj+Noun/Verb Ratio: {ratios['adj_noun_verb_ratio']:.2f}")
 ```
 %% Output
    Nominal/Verb Ratio: 1.00
    Verb/Nominal Ratio: 1.00
    Adverb/Verb Ratio: 0.00
    Adj+Noun/Verb Ratio: 1.00
 %% Cell type:markdown id:d3a929bf-61cb-4ef8-bc00-6e2a59760d37 tags:
 ## Readability
 %% Cell type:markdown id:3fe25ff0-3f83-40fe-8420-08c09ffe98e6 tags:
 ### 📚 French Readability MetricsTodo: verify this
 This notebook implements and explains three common **readability formulas** tailored for **French texts**:
 ---
 #### 1. 🟦 **Flesch–Douma Index**
 An adaptation of the original Flesch Reading Ease formula for French.
 $\text{Flesch–Douma} = 207 - (1.015 \times \text{ASL}) - (73.6 \times \text{ASW})$
 Where:
 - **ASL** = Average Sentence Length = (number of words) / (number of sentences)
 - **ASW** = Average Syllables per Word = (number of syllables) / (number of words)
 📊 **Interpretation**:
 - 90–100: Very easy
 - 60–70: Standard
 - 30–50: Difficult
 - < 30: Very difficult
 ---
 #### 2. 🟨 **LIX Index**
 Used widely in French and other European languages. Measures sentence length and lexical complexity.
 $\text{LIX} = \frac{\text{number of words}}{\text{number of sentences}} + \frac{100 \times \text{number of long words (≥7 chars)}}{\text{number of words}}$
 📊 **Interpretation**:
 - $<$ 30: Easy
 - 30–40: Medium
 - $>$ 50: Difficult
 ---
 #### 3. 🟥 **Kandel–Moles Index**
 A linear formula proposed for French readability:
 $\text{Kandel–Moles} = 0.1935 \times \text{number of words} + 0.1672 \times \text{number of syllables} - 1.779$
 📊 **Interpretation**:
 - Higher values indicate more complex texts.
 ---
 These formulas help estimate how easily a French reader can understand a given passage. The metrics can be used to analyze textbooks, articles, instructional materials, etc.
 %% Cell type:code id:b9052dc2-ce45-4af4-a0a0-46c60a13da12 tags:
 ``` python
 # Rewriting the readability metric functions here, without relying on downloading external resources
 import re
 # Naive sentence splitter (based on punctuation)
 def naive_sentence_tokenize(text):
    return re.split(r'[.!?]+', text.strip())
 # Naive word tokenizer (splits on whitespace and punctuation)
 def naive_word_tokenize(text):
    return re.findall(r'\b\w+\b', text.lower())
 # Function to count syllables in a French word (naive method using vowel groups)
 def count_syllables(word):
    vowels = "aeiouyàâäéèêëîïôöùûüœ"
    syllables = re.findall(rf"[{vowels}]+", word)
    return max(1, len(syllables))
 # Function to compute Flesch-Douma, LIX, and Kandel-Moles indices
 def compute_french_readability(text):
    sentences = [s for s in naive_sentence_tokenize(text) if s.strip()]
    words = [w for w in naive_word_tokenize(text) if re.match(r"\w+", w)]
    num_sentences = len(sentences)
    num_words = len(words)
    num_syllables = sum(count_syllables(w) for w in words)
    num_long_words = sum(1 for w in words if len(w) >= 7)
    if num_sentences == 0 or num_words == 0:
        return {
            "Flesch-Douma": 0.0,
            "LIX": 0.0,
            "Kandel-Moles": 0.0
        }
    # Flesch-Douma
    asl = num_words / num_sentences  # Average sentence length
    asw = num_syllables / num_words  # Average syllables per word
    flesch_douma = 207 - (1.015 * asl) - (73.6 * asw)
    # LIX
    lix = (num_words / num_sentences) + (100 * num_long_words / num_words)
    # Kandel-Moles
    kandel_moles = 0.1935 * num_words + 0.1672 * num_syllables - 1.779
    return {
        "Flesch-Douma": round(flesch_douma, 2),
        "LIX": round(lix, 2),
        "Kandel-Moles": round(kandel_moles, 2)
    }
 ```
 %% Cell type:code id:1e9dd0fb-db6a-47d1-8bfb-1015845f6d3e tags:
 ``` python
 # Test on a sample French text
 sample_french_text = """
 Aramis était à son poste. Il était tombé de ses bras. Ce n'était pas un accident.
 """
 compute_french_readability(sample_french_text)
 ```
 %% Output
    {'Flesch-Douma': 88.68, 'LIX': 11.55, 'Kandel-Moles': 5.86}
 %% Cell type:markdown id:8a0c0fff-d605-4349-a698-a11fd404e2e8 tags:
 ## Calculate avg scores
 %% Cell type:code id:24bc84a5-b2df-4194-838a-8f24302599bd tags:
 ``` python
 # Define the function to compute average word length and sentence length
 def compute_avg_lengths(sample_text):
    sentences = []
    current_sentence = []
    for line in sample_text.strip().split('\n'):
        cols = line.strip().split('\t')
        if not cols or len(cols) < 1:
            continue
        token = cols[0]
        if token == '<s>':
            current_sentence = []
        elif token == '</s>':
            if current_sentence:
                sentences.append(current_sentence)
        else:
            current_sentence.append(token)
    total_words = 0
    total_word_length = 0
    sentence_lengths = []
    for sentence in sentences:
        words = [w for w in sentence if re.match(r'\w+', w) and w not in ['<s>', '</s>']]
        sentence_lengths.append(len(words))
        total_words += len(words)
        total_word_length += sum(len(w) for w in words)
    avg_word_length = total_word_length / total_words if total_words else 0
    avg_sentence_length = sum(sentence_lengths) / len(sentence_lengths) if sentence_lengths else 0
    return {
        "Average Word Length": round(avg_word_length, 2),
        "Average Sentence Length": round(avg_sentence_length, 2)
    }
 ```
 %% Cell type:code id:0cdb972f-31b6-4e7e-82a8-371eda344f2c tags:
 ``` python
 # Sample text from the user
 sample_text = """
 <s>	<s>	<s>	0
 Aramis	npp	<nul>@@<nul>	0
 était	v	<nul>@@<nul>	0
 à	p	<nul>@@<nul>	0
 son	det	NP@@<nul>	0
 poste	nc	<nul>@@<nul>	1
 ,	ponct	<nul>@@<nul>	0
 il	cls-suj	VN@@<nul>	0
 était	v	<nul>@@<nul>	1
 tombé	vpp	<nul>@@<nul>	1
 de	p	PP-DE_OBJ@@Sint-MOD	1
 ses	det	NP@@<nul>	2
 bras	nc	<nul>@@<nul>	3
 .	ponct	<nul>@@<nul>	0
 </s>	</s>	</s>	0
 <s>	<s>	<s>	0
 Aramis	npp	<nul>@@<nul>	0
 était	v	<nul>@@<nul>	0
 à	p	<nul>@@<nul>	0
 </s>	</s>	</s>	0
 """
 # Compute and display the results
 compute_avg_lengths(sample_text)
 ```
 %% Output
    {'Average Word Length': 3.79, 'Average Sentence Length': 7.0}
 %% Cell type:markdown id:bf5b0b52-e5c4-4b40-b925-495f4dd8e3be tags:
 ## Calculate POS frequencies
 %% Cell type:code id:56af520c-d56b-404a-aebf-ad7c2a9ca503 tags:
 ``` python
 def compute_pos_frequency(column_text):
    verb_tags = {"v", "vpp", "vpr"}
    noun_tags = {'nc', 'npp'}
    adj_tags = {'adj'}
    adv_tags = {'adv'}
    total_tokens = 0
    verb_count = 0
    noun_count = 0
    adj_count = 0
    adv_count = 0
    for line in column_text.strip().split('\n'):
        parts = line.strip().split('\t')
        if len(parts) < 2:
            continue
        token, pos = parts[0], parts[1]
        if re.match(r'\w+', token):  # ignore punctuation
            total_tokens += 1
            if pos in verb_tags:
                verb_count += 1
            if pos in noun_tags:
                noun_count += 1
            if pos in adj_tags:
                adj_count += 1
            if pos in adv_tags:
                adv_count += 1
    if total_tokens == 0:
        return 0.0
    return {
        'verb_freq': round(verb_count / total_tokens, 4),
        'noun_freq': round(noun_count / total_tokens, 4),
        'adv_freq': round(adv_count / total_tokens, 4),
        'adj_freq': round(adj_count / total_tokens, 4),
    }
 ```
 %% Cell type:code id:f7c8b125-4651-4b21-bcc4-93ef78a4239b tags:
 ``` python
 freqs = compute_pos_frequency(sample_text)
 print(f"Verb Frequency: {freqs['verb_freq']:.2f}")
 print(f"Noun Frequency: {freqs['noun_freq']:.2f}")
 print(f"Adj Frequency: {freqs['adv_freq']:.2f}")
 print(f"Adv Frequency: {freqs['adj_freq']:.2f}")
 ```
 %% Output
    Verb Frequency: 0.29
    Noun Frequency: 0.29
    Adj Frequency: 0.00
    Adv Frequency: 0.00
 %% Cell type:markdown id:4cd15f8f-5618-4586-bd43-30f4919c7274 tags:
 ### MSTTR-100 (Mean Segmental Type-Token Ratio)
 MSTTR-100 measures lexical diversity by dividing the text into consecutive segments of 100 tokens and computing the type-token ratio (TTR) for each segment. The final MSTTR-100 is the average TTR across all segments.
 %% Cell type:code id:daa17c33-adca-4695-90eb-741579382939 tags:
 ``` python
 import re
 def msttr(text, segment_size):
    words = re.findall(r'\b\w+\b', text.lower())
    if len(words) < segment_size:
        return len(set(words)) / len(words)
    segments = [words[i:i+segment_size] for i in range(0, len(words), segment_size)]
    ttrs = [len(set(segment)) / len(segment) for segment in segments if len(segment) == segment_size]
    return sum(ttrs) / len(ttrs)
 ```
 %% Cell type:code id:80d8fa08-6b7d-4ab7-85cd-987823639277 tags:
 ``` python
 print("MSTTR: ", msttr(sample_french_text, 100))
 ```
 %% Output
    MSTTR:  0.8823529411764706
 %% Cell type:markdown id:91c7969a-3fff-4935-9f26-7e1ebb6b64c6 tags:
 ### BZIP TXT
 "BZIP TXT" refers to the compression ratio achieved by compressing the text using the BZIP2 algorithm. It serves as a proxy for the text's redundancy and complexity.
 %% Cell type:code id:c8bd9186-eab8-4ca6-93bd-82b260cd3d19 tags:
 ``` python
 import bz2
 def bzip_compression_ratio(text):
    original_size = len(text.encode('utf-8'))
    compressed_size = len(bz2.compress(text.encode('utf-8')))
    return compressed_size / original_size
 ```
 %% Cell type:code id:3f9c7dc7-6820-4013-a85c-2af4f846d4f5 tags:
 ``` python
 print("BZIP: ", bzip_compression_ratio(sample_french_text))
 ```
 %% Output
    BZIP:  1.1931818181818181
 %% Cell type:markdown id:88b6f5f8-90b7-4dfe-b8ee-d54380bf3194 tags:
 ### Word Entropy
 Word entropy quantifies the unpredictability or information content of words in a text. It's calculated using Shannon's entropy formula over the distribution of word frequencies.
 %% Cell type:code id:65e1a630-c46e-4b18-9831-b97864de53ee tags:
 ``` python
 import math
 from collections import Counter
 def word_entropy(text):
    words = re.findall(r'\b\w+\b', text.lower())
    total_words = len(words)
    word_counts = Counter(words)
    return -sum((count/total_words) * math.log2(count/total_words) for count in word_counts.values())
 ```
 %% Cell type:code id:1612e911-12a8-47c9-b811-b2d6885c3647 tags:
 ``` python
 print("WORD ENTROPY: ", word_entropy(sample_french_text))
 ```
 %% Output
    WORD ENTROPY:  3.807763576417195
 %% Cell type:markdown id:a58d962f-5d90-4ee9-b347-64f5bb52c24a tags:
 ### Bigram Entropy
 Bigram entropy measures the unpredictability of word pairs (bigrams) in a text, providing insight into the text's syntactic complexity.
 %% Cell type:code id:925a3a75-aaaa-4851-b77b-b42cb1e21e11 tags:
 ``` python
 def bigram_entropy(text):
    words = re.findall(r'\b\w+\b', text.lower())
    bigrams = list(zip(words, words[1:]))
    total_bigrams = len(bigrams)
    bigram_counts = Counter(bigrams)
    return -sum((count/total_bigrams) * math.log2(count/total_bigrams) for count in bigram_counts.values())
 ```
 %% Cell type:code id:6fa60897-ad26-43b4-b8de-861290ca6bd3 tags:
 ``` python
 print("BIGRAM ENTROPY: ", bigram_entropy(sample_french_text))
 ```
 %% Output
    BIGRAM ENTROPY:  4.0
 %% Cell type:markdown id:6ac26902-75a5-4824-8c2b-da3f733c820f tags:
 ## Spearman Correlation between perplexity and stylometric features
 %% Cell type:code id:f3678462-e572-4ce5-8d3d-a5389b2356c8 tags:
 ``` python
 #!pip3 install seaborn
 #!pip3 install scipy
 ```
 %% Cell type:code id:b621b2a8-488f-44db-b085-fe156f453943 tags:
 ``` python
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
 from scipy.stats import spearmanr
 # Sample data (to be replaces with real values)
 data = {
    "perplexity": [32.5, 45.2, 28.1, 39.0, 50.3],
    "avg_word_length": [4.1, 4.3, 4.0, 4.2, 4.5],
    "avg_sentence_length": [12.5, 13.0, 11.0, 12.0, 13.5],
    "word_entropy": [6.1, 6.3, 6.0, 6.2, 6.4],
    "bigram_entropy": [8.0, 8.2, 7.9, 8.1, 8.3]
 }
 df = pd.DataFrame(data)
 # Compute Spearman correlation
 corr, _ = spearmanr(df)
 corr_df = pd.DataFrame(corr, index=df.columns, columns=df.columns)
 # Plot heatmap
 plt.figure(figsize=(8, 6))
 sns.heatmap(corr_df, annot=True, cmap="coolwarm", fmt=".2f", square=True, linewidths=0.5)
 plt.title("Spearman Correlation Heatmap")
 plt.tight_layout()
 plt.show()
 ```
 %% Output
 %% Cell type:markdown id:45ee04fc-acab-4bba-ba06-e4cf4bca9fe5 tags:
 ## Tree depth
 %% Cell type:code id:79f99787-c220-4f1d-93a9-59230363ec3f tags:
 ``` python
 def parse_sentence_block(text):
    lines = text.strip().split('\n')
    result = []
    tokenlist = []
    for line in lines:
        # Split the line by tab and strip whitespace
        parts = tuple(line.strip().split('\t'))
        # Only include lines that have exactly 4 parts
        if len(parts) == 4:
            parentidx =  int(parts[3])
            if '@@' in parts[2]:
                nonterm1 = parts[2].split('@@')[0]
                nonterm2 = parts[2].split('@@')[1]
            else:
                nonterm1 = parts[2]
                nonterm2 = '<nul>'
            postag = parts[1]
            token = parts[0]
            result.append((parentidx, nonterm1, nonterm2, postag))
            tokenlist.append(token)
    return result, tokenlist
 ```
 %% Cell type:code id:f567efb0-8b0b-4782-9345-052cf1785776 tags:
 ``` python
 example_sentence = """
 <s>	<s>	<s>	1
 --	ponct	<nul>@@<nul>	1
 Eh	npp	<nul>@@<nul>	1
 bien?	adv	AP@@<nul>	1
 fit	v	VN@@<nul>	2
 -il	cls-suj	VN@@VPinf-OBJ	3
 .	ponct	<nul>@@<nul>	4
 </s>	</s>	</s>	4
 """
 ```
 %% Cell type:code id:8d4ecba9-89b8-4000-a061-aa16aa68a404 tags:
 ``` python
 from transform import *
 def visualize_const_prediction(example_sent):
    parsed, tokenlist = parse_sentence_block(example_sent)
    tree = AttachJuxtaposeTree.totree(tokenlist, 'SENT')
    AttachJuxtaposeTree.action2tree(tree, parsed).pretty_print()
    nltk_tree = AttachJuxtaposeTree.action2tree(tree, parsed)
-    #print("NLTK TREE", nltk_tree)
+    print("NLTK TREE", nltk_tree)
    depth = nltk_tree.height() - 1  # NLTK includes the leaf level as height 1, so subtract 1 for tree depth
    print("Tree depth:", depth)
 ```
+%% Output
+    /home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:295: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.
+      @torch.cuda.amp.custom_fwd(cast_inputs=torch.float)
+    /home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:303: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.
+      @torch.cuda.amp.custom_bwd
+    /home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:315: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.
+      @torch.cuda.amp.custom_fwd(cast_inputs=torch.float)
+    /home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:322: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.
+      @torch.cuda.amp.custom_bwd
+    /home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:335: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.
+      @torch.cuda.amp.custom_fwd(cast_inputs=torch.float)
+    /home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:342: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.
+      @torch.cuda.amp.custom_bwd
+    /home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:352: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.
+      @torch.cuda.amp.custom_fwd(cast_inputs=torch.float)
+    /home/tatiana.bladier/compo-text-eval/tania_scripts/supar/structs/fn.py:365: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.
+      @torch.cuda.amp.custom_bwd
 %% Cell type:code id:bfd3abf3-b83a-4817-85ad-654daf72be88 tags:
 ``` python
 visualize_const_prediction(example_sentence)
 ```
 %% Output
                   SENT
                    |
                   <s>
                    |
                   <s>
      ______________|__________
     |    |    |               AP
     |    |    |     __________|________
     |    |    |    |               VPinf-OBJ
     |    |    |    |     ______________|_______
     |    |    |    |    |                      VN
     |    |    |    |    |      ________________|____
     |    |    |    |    VN    |                |   </s>
     |    |    |    |    |     |                |    |
     |  ponct npp  adv   v  cls-suj           ponct </s>
     |    |    |    |    |     |                |    |
    <s>   --   Eh bien? fit   -il               .   </s>
+    NLTK TREE (SENT
+      (<s>
+        (<s>
+          <s>
+          (ponct --)
+          (npp Eh)
+          (AP
+            (adv bien?)
+            (VPinf-OBJ
+              (VN (v fit))
+              (VN (cls-suj -il) (ponct .) (</s> (</s> </s>))))))))
    Tree depth: 8
 %% Cell type:code id:bc51ab44-6885-45cc-bad2-6a43a7791fdb tags:
 ``` python
+def readFile(filepath):
+    with open(filepath, "r") as inf:
+        sentences = []
+        current_sentence = []
+        for line in inf:
+            cols = line.strip().split('\t')
+            if not cols or len(cols) != 4:
+                continue
+            token = cols[0]
+            if token == '<s>':
+                current_sentence = []
+            elif token == '</s>':
+                if current_sentence:
+                    sentences.append(current_sentence)
+            else:
+                current_sentence.append(cols)
+        return sentences
+```
+%% Cell type:code id:673d6a76-42a0-4dcd-9c54-ae18710a032a tags:
+``` python
+file_conll = "sp_generated_temp_0.7_all_data_73m_tok_pos_macro_positional_const-epoch-8-train_loss-4.3-val_loss-4.9.pt_500"
+sents = readFile(file_conll)
+print(sents)
+```
+%% Output
+    [[['--', 'ponct', '<nul>@@<nul>', '0'], ['Le', 'det', 'NP@@<nul>', '0'], ['parlement', 'nc', '<nul>@@<nul>', '1'], ['...', 'ponct', '<nul>@@SENT', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Vous', 'pro', 'VN@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', '<nul>@@<nul>', '1'], ['Athos', 'npp', 'NP-OBJ@@Sint-MOD', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['me', 'v', '<nul>@@<nul>', '2'], ['reconnais', 'v', '<nul>@@<nul>', '2'], ['pas', 'adv', '<nul>@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['M.', 'npp', 'NP-SUJ@@<nul>', '1'], ["d'", 'p', 'NPP+@@<nul>', '2'], ['Artagnan', 'npp', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Acté', 'nc', 'NP-OBJ@@<nul>', '1'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Oui', 'adj', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['répondit', 'v', 'VN@@<nul>', '1'], ['le', 'det', 'NP-OBJ@@Sint-MOD', '2'], ['mousquetaire', 'nc', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Alors', 'adv', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', 'VN@@<nul>', '1'], ['le', 'det', 'NP-OBJ@@Sint-MOD', '1'], ['roi', 'nc', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], ['faut', 'v', '<nul>@@<nul>', '2'], ['que', 'cs', 'Ssub-OBJ@@<nul>', '1'], ['vous', 'cls-suj', 'VN@@<nul>', '2'], ['ne', 'adv', '<nul>@@<nul>', '3'], ['le', 'clo-obj', '<nul>@@<nul>', '3'], ['rendiez', 'v', '<nul>@@<nul>', '3'], ['pas', 'adv', '<nul>@@Sint', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Non', 'adv', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], ['est', 'v', '<nul>@@<nul>', '2'], ['vrai', 'adj', 'AP-ATS@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '2'], ['je', 'cls-suj', '<nul>@@<nul>', '2'], ['le', 'clo-obj', 'NP-OBJ@@<nul>', '2'], ['suis', 'v', '<nul>@@<nul>', '3'], [',', 'ponct', '<nul>@@<nul>', '2'], ['mais', 'cc', 'COORD@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '3'], ["m'", 'clr', '<nul>@@<nul>', '4'], ['a', 'v', '<nul>@@<nul>', '4'], ['semblé', 'vpp', '<nul>@@<nul>', '4'], ['...', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '1']], [['Il', 'cls-suj', 'VN@@Sint-MOD', '1'], ['a', 'v', '<nul>@@<nul>', '2'], ['répondu', 'vpp', '<nul>@@<nul>', '2'], [':', 'ponct', '<nul>@@<nul>', '0'], ['voilà', 'vinf', 'VN@@<nul>', '0'], ['tout', 'adv', 'NP-OBJ@@Sint-MOD', '1'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Je', 'cls-suj', 'VN@@Sint-MOD', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['puis', 'adv', '<nul>@@<nul>', '2'], ['dire', 'v', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['car', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['bien', 'adv', '<nul>@@<nul>', '2'], ['facile', 'adj', 'AP-ATS@@<nul>', '2'], ['à', 'p', 'PP@@<nul>', '2'], ['comprendre', 'vinf', 'VN@@<nul>', '3'], ['.', 'ponct', '<nul>@@VPinf', '4']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['Mais', 'cc', 'COORD@@<nul>', '5'], ['le', 'det', 'NP@@<nul>', '6'], ['moins', 'adv', '<nul>@@<nul>', '7'], ['ne', 'adv', 'VN@@<nul>', '7'], ['rendra', 'v', '<nul>@@<nul>', '8'], ['-t', 'vinf', 'VN@@VPpart', '8'], ['-il', 'cls-suj', 'P+@@VPinf-OBJ', '9'], ['donc', 'adv', '<nul>@@<nul>', '10'], ['?', 'ponct', '<nul>@@<nul>', '10']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['Non', 'adv', 'AP@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0']], [["J'", 'cls-suj', 'VN@@<nul>', '0'], ['ai', 'v', '<nul>@@<nul>', '1'], ['vu', 'vpp', '<nul>@@<nul>', '1'], ['Athos', 'npp', 'NP-OBJ@@Sint-MOD', '1'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Sans', 'p', 'PP-MOD@@<nul>', '0'], ['doute', 'nc', 'NP@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['mais', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['vrai', 'adj', 'AP-ATS@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Eh', 'npp', '<nul>@@<nul>', '0'], ['bien', 'adv', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0'], ['pardieu', 'nc', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0']], [['je', 'cls-suj', 'VN@@<nul>', '0'], ['ne', 'adv', '<nul>@@<nul>', '1'], ['veux', 'v', '<nul>@@<nul>', '1'], ['pas', 'adv', '<nul>@@VPinf-OBJ', '1'], ["qu'", 'adv', 'Ssub-OBJ@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['soit', 'vs', '<nul>@@<nul>', '3'], ['avec', 'p', 'PP-MOD@@Sint', '3'], ['lui', 'pro', 'NP@@<nul>', '4'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['le', 'det', 'NP-SUJ@@<nul>', '0'], ['roi?', 'nc', '<nul>@@<nul>', '1'], ['pas', 'adv', 'AP@@<nul>', '1'], ['Aramis', 'adj', '<nul>@@<nul>', '2'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['est', 'v', '<nul>@@<nul>', '1'], ['évêque', 'adj', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@Sint-MOD', '1'], ['dit', 'v', 'VN@@<nul>', '1'], ['Athos', 'npp', 'NP-SUJ@@Sint-MOD', '2'], [',', 'ponct', '<nul>@@<nul>', '3'], ['à', 'p', 'PP-MOD@@<nul>', '3'], ['tous', 'adj', 'NP@@<nul>', '4'], ['ses', 'det', '<nul>@@<nul>', '5'], ['membres', 'nc', '<nul>@@<nul>', '5'], ['nous', 'cls', 'VN@@<nul>', '4'], ['autres', 'adj', '<nul>@@<nul>', '5'], ['amis', 'vpp', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@Sint', '5'], ['et', 'cc', 'COORD@@<nul>', '3'], ['nous', 'cls-suj', 'VN@@<nul>', '4'], ['aurons', 'v', '<nul>@@<nul>', '5'], ["l'", 'det', 'NP-OBJ@@<nul>', '4'], ['honneur', 'nc', '<nul>@@<nul>', '5'], ['de', 'p', 'PP@@<nul>', '5'], ['nous', 'pro', 'NP@@<nul>', '6'], ['défendre', 'vinf', '<nul>@@<nul>', '7'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Que', 'prorel', '<nul>@@<nul>', '0'], ['diable', 'nc', '<nul>@@<nul>', '0'], ['ne', 'adv', 'VN@@<nul>', '0'], ['le', 'clo-obj', '<nul>@@<nul>', '1'], ['voulez', 'v', '<nul>@@<nul>', '1'], ['-vous', 'clo-a_obj', '<nul>@@VPinf-OBJ', '1'], ['pas', 'adv', '<nul>@@<nul>', '1'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Aramis', 'npp', 'NP-MOD@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], ["qu'", 'cs', 'NP-MOD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@Srel', '2'], ['vous', 'clo-a_obj', '<nul>@@<nul>', '3'], ['a', 'v', '<nul>@@<nul>', '3'], ['dit', 'vpp', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Mais', 'cc', 'COORD@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '1'], ['dit', 'v', 'VN@@<nul>', '1'], ["d'", 'p', 'PP-DE_OBJ@@<nul>', '1'], ['Artagnan', 'npp', 'NP@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ["l'", 'clo-obj', '<nul>@@<nul>', '2'], ['ai', 'v', '<nul>@@<nul>', '2'], ['point', 'vpp', '<nul>@@<nul>', '2'], ['aperçu', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@Sint-MOD', '2'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['ne', 'adv', '<nul>@@<nul>', '2'], ['le', 'clo-obj', '<nul>@@<nul>', '2'], ['sais', 'v', '<nul>@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '2']], [['--', 'ponct', '<nul>@@<nul>', '2'], ['Mordaunt', 'npp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '2'], ["qu'", 'cs', 'Ssub-OBJ@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '3'], ['faut', 'v', '<nul>@@<nul>', '4'], ['le', 'det', 'NP-OBJ@@Sint', '4'], ['repos', 'nc', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['nous', 'clo-a_obj', '<nul>@@<nul>', '3'], ['rejoindrait', 'v', '<nul>@@<nul>', '3'], ['sur', 'p', 'PP-P_OBJ@@Sint', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['même', 'adj', '<nul>@@<nul>', '5'], ['champ', 'nc', '<nul>@@<nul>', '5'], ['de', 'p', 'PP@@<nul>', '5'], ['bataille', 'nc', 'NP@@<nul>', '6'], ['.', 'ponct', '<nul>@@<nul>', '2']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Est', 'nc', 'NP-OBJ@@<nul>', '1'], ['-ce', 'det', 'NP@@<nul>', '2'], ['lui', 'pro', '<nul>@@<nul>', '3'], ['?', 'ponct', '<nul>@@<nul>', '1'], ['demanda', 'v', 'VN@@<nul>', '1'], ['Aramis', 'npp', 'NP-OBJ@@Sint-MOD', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Porthos', 'npp', '<nul>@@<nul>', '0'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Mordaunt', 'npp', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['dit', 'v', 'VN@@<nul>', '0'], ['Athos', 'npp', 'NP-SUJ@@Sint-MOD', '1'], [',', 'ponct', '<nul>@@<nul>', '2'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['est', 'v', '<nul>@@<nul>', '3'], ['un', 'det', 'NP-OBJ@@Sint', '3'], ['homme', 'nc', '<nul>@@<nul>', '4'], ['le', 'det', 'NP-OBJ@@<nul>', '4'], ['premier', 'adj', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '5'], ['un', 'det', 'NP@@<nul>', '5'], ['homme', 'nc', '<nul>@@<nul>', '6'], ['qui', 'prorel', 'NP-SUJ@@<nul>', '6'], ["s'", 'clr', 'VN@@Srel', '7'], ['est', 'v', '<nul>@@<nul>', '8'], ['passé', 'vpp', '<nul>@@<nul>', '8'], ['.', 'ponct', '<nul>@@<nul>', '5']], [['--', 'ponct', '<nul>@@<nul>', '5'], ['il', 'cls-suj', 'VN@@<nul>', '5'], ['est', 'v', '<nul>@@<nul>', '6'], ['toujours', 'adv', '<nul>@@VPpart', '6'], ['le', 'det', 'NP-OBJ@@<nul>', '6'], ['moins', 'adv', '<nul>@@<nul>', '7'], ['de', 'p', 'PP@@<nul>', '7'], ['le', 'det', 'NP@@<nul>', '8'], ['monde', 'nc', '<nul>@@<nul>', '9'], [',', 'ponct', '<nul>@@<nul>', '5'], ['mais', 'cc', 'COORD@@<nul>', '5'], ['il', 'cls-suj', 'VN@@<nul>', '6'], ['ne', 'adv', '<nul>@@<nul>', '7'], ['nous', 'clo-a_obj', '<nul>@@<nul>', '7'], ['aurait', 'v', '<nul>@@<nul>', '7'], ['-il', 'pro', '<nul>@@<nul>', '7'], ['pas', 'adv', '<nul>@@<nul>', '7'], ['de', 'p', 'PP@@<nul>', '4'], ['le', 'det', 'NP@@<nul>', '5'], ['monde', 'nc', '<nul>@@<nul>', '6'], ['?', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['nous', 'cls-suj', 'VN@@<nul>', '0'], ["l'", 'clo-obj', '<nul>@@<nul>', '1'], ['avons', 'v', '<nul>@@<nul>', '1'], ['dit', 'vpp', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@Sint', '1'], ['nous', 'cls-suj', 'VN@@<nul>', '1'], ["l'", 'clo-obj', '<nul>@@<nul>', '2'], ['avons', 'v', '<nul>@@<nul>', '2'], ['dit', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@Sint-MOD', '2'], ['nous', 'cls-suj', 'VN@@<nul>', '1'], ["l'", 'clo-obj', '<nul>@@<nul>', '2'], ['avons', 'v', '<nul>@@<nul>', '2'], ['dit', 'vpp', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['les', 'det', 'NP-OBJ@@<nul>', '1'], ['avons', 'nc', '<nul>@@<nul>', '2'], ['vu', 'vpp', 'VPpart@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Aramis', 'npp', '<nul>@@<nul>', '0'], ['!', 'ponct', '<nul>@@<nul>', '0']], [['Je', 'cls-suj', 'VN@@<nul>', '0'], ["m'", 'clr', '<nul>@@<nul>', '1'], ['en', 'clo', '<nul>@@<nul>', '1'], ['doutais', 'v', '<nul>@@<nul>', '1'], ['.', 'ponct', '<nul>@@<nul>', '1']], [['--', 'ponct', '<nul>@@<nul>', '1'], ['Aramis', 'npp', 'AP-ATS@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '1'], ['je', 'cls-suj', 'VN@@<nul>', '1'], ['le', 'clo-obj', '<nul>@@<nul>', '2'], ['sais', 'v', '<nul>@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['mais', 'cc', 'COORD@@<nul>', '1'], ['il', 'cls-suj', 'VN@@<nul>', '2'], ['venait', 'v', '<nul>@@<nul>', '3'], ['me', 'det', '<nul>@@<nul>', '2'], ['donner', 'vinf', 'VN@@<nul>', '2'], ['de', 'p', 'PP-DE_OBJ@@VPinf-OBJ', '3'], ["l'", 'det', 'NP@@<nul>', '4'], ['hospitalité', 'nc', '<nul>@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['--', 'ponct', '<nul>@@<nul>', '0'], ['Oui', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['je', 'cls-suj', 'VN@@<nul>', '0'], ["l'", 'clo-obj', '<nul>@@<nul>', '1'], ['ignore', 'v', '<nul>@@<nul>', '1'], [';', 'ponct', '<nul>@@Sint-MOD', '1'], ['il', 'cls-suj', 'VN@@<nul>', '1'], ["s'", 'clr', '<nul>@@<nul>', '2'], ['agit', 'v', '<nul>@@<nul>', '2'], ["d'", 'p', 'PP-DE_OBJ@@Sint-MOD', '2'], ['être', 'vinf', 'VN@@<nul>', '3'], ['élevé', 'vpp', '<nul>@@<nul>', '4'], [',', 'ponct', '<nul>@@VPinf', '4'], ['moi', 'adj', '<nul>@@<nul>', '4'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], ['et', 'cc', 'COORD@@<nul>', '0'], ['Porthos', 'npp', 'NP@@<nul>', '1'], ['se', 'clr', 'VN@@<nul>', '1'], ['regardaient', 'v', '<nul>@@<nul>', '2'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['politesse', 'nc', 'NP@@<nul>', '2'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['étonnement', 'nc', 'NP@@<nul>', '2'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Porthos', 'npp', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['qui', 'prorel', 'NP-SUJ@@<nul>', '0'], ["n'", 'adv', 'VN@@Srel', '1'], ['avait', 'v', '<nul>@@<nul>', '2'], ['pas', 'adv', '<nul>@@<nul>', '2'], ["d'", 'det', 'PP-DE_OBJ@@<nul>', '1'], ['esprit', 'nc', 'NP@@<nul>', '2'], [',', 'ponct', '<nul>@@<nul>', '1'], ['et', 'cc', 'COORD@@<nul>', '1'], ["l'", 'det', 'NP-SUJ@@<nul>', '2'], ['avait', 'v', '<nul>@@<nul>', '3'], ['vu', 'vpp', 'VPpart@@<nul>', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['roi', 'nc', '<nul>@@<nul>', '5'], [',', 'ponct', '<nul>@@<nul>', '3'], ['il', 'cls-suj', 'VN@@<nul>', '3'], ['était', 'v', '<nul>@@<nul>', '4'], ['comme', 'p', 'PP-MOD@@<nul>', '4'], ['ami', 'nc', '<nul>@@<nul>', '4'], ['de', 'p', 'PP-DE_OBJ@@VPinf', '4'], ['son', 'det', 'NP@@<nul>', '5'], ['beau-frère', 'nc', '<nul>@@<nul>', '6'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '0'], ['debout', 'adj', '<nul>@@<nul>', '0'], [',', 'ponct', '<nul>@@<nul>', '0'], ['comme', 'cs', 'Ssub-MOD@@<nul>', '0'], ['le', 'det', 'VN@@<nul>', '1'], ['regardait', 'v', '<nul>@@<nul>', '2'], ['de', 'p', 'PP-DE_OBJ@@Sint', '2'], ['le', 'det', 'NP@@<nul>', '3'], ['côté', 'nc', '<nul>@@<nul>', '4'], ['de', 'p', 'PP@@<nul>', '4'], ['lui', 'pro', 'NP@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Aramis', 'npp', '<nul>@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '0'], ['à', 'p', '<nul>@@<nul>', '0'], ['son', 'det', 'NP@@<nul>', '0'], ['poste', 'nc', '<nul>@@<nul>', '1'], [',', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '1'], ['tombé', 'vpp', '<nul>@@<nul>', '1'], ['de', 'p', 'PP-DE_OBJ@@Sint-MOD', '1'], ['ses', 'det', 'NP@@<nul>', '2'], ['bras', 'nc', '<nul>@@<nul>', '3'], ['.', 'ponct', '<nul>@@<nul>', '0']], [['Athos', 'npp', '<nul>@@<nul>', '0'], ["l'", 'det', 'NP-MOD@@<nul>', '0'], ['attendait', 'nc', '<nul>@@<nul>', '1'], ['avec', 'p', 'PP-MOD@@<nul>', '1'], ['une', 'det', 'NP@@<nul>', '2'], ['inquiétude', 'nc', '<nul>@@<nul>', '3'], ['singulière', 'adj', 'AP@@<nul>', '3'], [';', 'ponct', '<nul>@@<nul>', '0'], ['il', 'cls-suj', 'VN@@<nul>', '0'], ['était', 'v', '<nul>@@<nul>', '1'], ['debout', 'adj', 'AP-ATS@@<nul>', '1'], ['plutôt', 'adv', '<nul>@@Sint-MOD', '1'], ['silencieusement', 'adv', '<nul>@@<nul>', '1'], ['que', 'cs', 'Ssub-OBJ@@<nul>', '1'], ['le', 'det', 'NP-SUJ@@<nul>', '2'], ['poignard', 'nc', '<nul>@@<nul>', '3'], ['sur', 'p', 'PP@@<nul>', '3'], ['le', 'det', 'NP@@<nul>', '4'], ['front', 'nc', '<nul>@@<nul>', '5'], ['.', 'ponct', '<nul>@@<nul>', '3']]]
+%% Cell type:code id:729d7671-02be-443b-94ee-ddc79284aac4 tags:
+``` python
 ```

--- a/tania_scripts/sp_generated_temp_0.7_all_data_73m_tok_pos_macro_positional_const-epoch-8-train_loss-4.3-val_loss-4.9.pt_500
+++ b/tania_scripts/sp_generated_temp_0.7_all_data_73m_tok_pos_macro_positional_const-epoch-8-train_loss-4.3-val_loss-4.9.pt_500
+<s> ||| END OF PROMPT: ||| 
+[['<s>', '<s>', '<s>', '0']]
+--	ponct	<nul>@@<nul>	0
+Le	det	NP@@<nul>	0
+parlement	nc	<nul>@@<nul>	1
+...	ponct	<nul>@@SENT	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	1
+--	ponct	<nul>@@<nul>	0
+Vous	pro	VN@@<nul>	0
+,	ponct	<nul>@@<nul>	1
+dit	v	<nul>@@<nul>	1
+Athos	npp	NP-OBJ@@Sint-MOD	1
+,	ponct	<nul>@@<nul>	1
+je	cls-suj	VN@@<nul>	1
+ne	adv	<nul>@@<nul>	2
+me	v	<nul>@@<nul>	2
+reconnais	v	<nul>@@<nul>	2
+pas	adv	<nul>@@<nul>	2
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	1
+--	ponct	<nul>@@<nul>	1
+M.	npp	NP-SUJ@@<nul>	1
+d'	p	NPP+@@<nul>	2
+Artagnan	npp	<nul>@@<nul>	3
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	1
+Acté	nc	NP-OBJ@@<nul>	1
+?	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	1
+--	ponct	<nul>@@<nul>	1
+Oui	adj	AP-ATS@@<nul>	1
+,	ponct	<nul>@@<nul>	1
+répondit	v	VN@@<nul>	1
+le	det	NP-OBJ@@Sint-MOD	2
+mousquetaire	nc	<nul>@@<nul>	3
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	1
+--	ponct	<nul>@@<nul>	1
+Alors	adv	<nul>@@<nul>	1
+,	ponct	<nul>@@<nul>	1
+dit	v	VN@@<nul>	1
+le	det	NP-OBJ@@Sint-MOD	1
+roi	nc	<nul>@@<nul>	2
+,	ponct	<nul>@@<nul>	1
+il	cls-suj	VN@@<nul>	1
+faut	v	<nul>@@<nul>	2
+que	cs	Ssub-OBJ@@<nul>	1
+vous	cls-suj	VN@@<nul>	2
+ne	adv	<nul>@@<nul>	3
+le	clo-obj	<nul>@@<nul>	3
+rendiez	v	<nul>@@<nul>	3
+pas	adv	<nul>@@Sint	3
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	1
+--	ponct	<nul>@@<nul>	1
+Non	adv	AP-ATS@@<nul>	1
+,	ponct	<nul>@@<nul>	1
+il	cls-suj	VN@@<nul>	1
+est	v	<nul>@@<nul>	2
+vrai	adj	AP-ATS@@<nul>	2
+,	ponct	<nul>@@<nul>	2
+je	cls-suj	<nul>@@<nul>	2
+le	clo-obj	NP-OBJ@@<nul>	2
+suis	v	<nul>@@<nul>	3
+,	ponct	<nul>@@<nul>	2
+mais	cc	COORD@@<nul>	2
+il	cls-suj	VN@@<nul>	3
+m'	clr	<nul>@@<nul>	4
+a	v	<nul>@@<nul>	4
+semblé	vpp	<nul>@@<nul>	4
+...	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	1
+--	ponct	<nul>@@<nul>	1
+</s>	</s>	</s>	1
+<s>	<s>	<s>	1
+Il	cls-suj	VN@@Sint-MOD	1
+a	v	<nul>@@<nul>	2
+répondu	vpp	<nul>@@<nul>	2
+:	ponct	<nul>@@<nul>	0
+voilà	vinf	VN@@<nul>	0
+tout	adv	NP-OBJ@@Sint-MOD	1
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	1
+Je	cls-suj	VN@@Sint-MOD	1
+ne	adv	<nul>@@<nul>	2
+puis	adv	<nul>@@<nul>	2
+dire	v	<nul>@@<nul>	2
+,	ponct	<nul>@@<nul>	1
+car	cc	COORD@@<nul>	1
+il	cls-suj	VN@@<nul>	2
+est	v	<nul>@@<nul>	3
+bien	adv	<nul>@@<nul>	2
+facile	adj	AP-ATS@@<nul>	2
+à	p	PP@@<nul>	2
+comprendre	vinf	VN@@<nul>	3
+.	ponct	<nul>@@VPinf	4
+</s>	</s>	</s>	4
+<s>	<s>	<s>	5
+--	ponct	<nul>@@<nul>	5
+Mais	cc	COORD@@<nul>	5
+le	det	NP@@<nul>	6
+moins	adv	<nul>@@<nul>	7
+ne	adv	VN@@<nul>	7
+rendra	v	<nul>@@<nul>	8
+-t	vinf	VN@@VPpart	8
+-il	cls-suj	P+@@VPinf-OBJ	9
+donc	adv	<nul>@@<nul>	10
+?	ponct	<nul>@@<nul>	10
+</s>	</s>	</s>	10
+<s>	<s>	<s>	10
+--	ponct	<nul>@@<nul>	5
+Non	adv	AP@@<nul>	5
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+J'	cls-suj	VN@@<nul>	0
+ai	v	<nul>@@<nul>	1
+vu	vpp	<nul>@@<nul>	1
+Athos	npp	NP-OBJ@@Sint-MOD	1
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+Sans	p	PP-MOD@@<nul>	0
+doute	nc	NP@@<nul>	1
+,	ponct	<nul>@@<nul>	1
+mais	cc	COORD@@<nul>	1
+il	cls-suj	VN@@<nul>	2
+est	v	<nul>@@<nul>	3
+vrai	adj	AP-ATS@@<nul>	2
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+Eh	npp	<nul>@@<nul>	0
+bien	adv	<nul>@@<nul>	0
+!	ponct	<nul>@@<nul>	0
+pardieu	nc	<nul>@@<nul>	0
+!	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+je	cls-suj	VN@@<nul>	0
+ne	adv	<nul>@@<nul>	1
+veux	v	<nul>@@<nul>	1
+pas	adv	<nul>@@VPinf-OBJ	1
+qu'	adv	Ssub-OBJ@@<nul>	1
+il	cls-suj	VN@@<nul>	2
+soit	vs	<nul>@@<nul>	3
+avec	p	PP-MOD@@Sint	3
+lui	pro	NP@@<nul>	4
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+le	det	NP-SUJ@@<nul>	0
+roi?	nc	<nul>@@<nul>	1
+pas	adv	AP@@<nul>	1
+Aramis	adj	<nul>@@<nul>	2
+?	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+Oui	adj	<nul>@@<nul>	0
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+Oui	adj	<nul>@@<nul>	0
+,	ponct	<nul>@@<nul>	0
+il	cls-suj	VN@@<nul>	0
+est	v	<nul>@@<nul>	1
+évêque	adj	AP-ATS@@<nul>	1
+,	ponct	<nul>@@Sint-MOD	1
+dit	v	VN@@<nul>	1
+Athos	npp	NP-SUJ@@Sint-MOD	2
+,	ponct	<nul>@@<nul>	3
+à	p	PP-MOD@@<nul>	3
+tous	adj	NP@@<nul>	4
+ses	det	<nul>@@<nul>	5
+membres	nc	<nul>@@<nul>	5
+nous	cls	VN@@<nul>	4
+autres	adj	<nul>@@<nul>	5
+amis	vpp	<nul>@@<nul>	5
+,	ponct	<nul>@@Sint	5
+et	cc	COORD@@<nul>	3
+nous	cls-suj	VN@@<nul>	4
+aurons	v	<nul>@@<nul>	5
+l'	det	NP-OBJ@@<nul>	4
+honneur	nc	<nul>@@<nul>	5
+de	p	PP@@<nul>	5
+nous	pro	NP@@<nul>	6
+défendre	vinf	<nul>@@<nul>	7
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+Que	prorel	<nul>@@<nul>	0
+diable	nc	<nul>@@<nul>	0
+ne	adv	VN@@<nul>	0
+le	clo-obj	<nul>@@<nul>	1
+voulez	v	<nul>@@<nul>	1
+-vous	clo-a_obj	<nul>@@VPinf-OBJ	1
+pas	adv	<nul>@@<nul>	1
+?	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+Aramis	npp	NP-MOD@@<nul>	0
+,	ponct	<nul>@@<nul>	1
+qu'	cs	NP-MOD@@<nul>	1
+il	cls-suj	VN@@Srel	2
+vous	clo-a_obj	<nul>@@<nul>	3
+a	v	<nul>@@<nul>	3
+dit	vpp	<nul>@@<nul>	3
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+Mais	cc	COORD@@<nul>	0
+,	ponct	<nul>@@<nul>	1
+dit	v	VN@@<nul>	1
+d'	p	PP-DE_OBJ@@<nul>	1
+Artagnan	npp	NP@@<nul>	2
+,	ponct	<nul>@@<nul>	1
+je	cls-suj	VN@@<nul>	1
+ne	adv	<nul>@@<nul>	2
+l'	clo-obj	<nul>@@<nul>	2
+ai	v	<nul>@@<nul>	2
+point	vpp	<nul>@@<nul>	2
+aperçu	vpp	<nul>@@<nul>	2
+,	ponct	<nul>@@Sint-MOD	2
+je	cls-suj	VN@@<nul>	1
+ne	adv	<nul>@@<nul>	2
+le	clo-obj	<nul>@@<nul>	2
+sais	v	<nul>@@<nul>	2
+.	ponct	<nul>@@<nul>	2
+</s>	</s>	</s>	2
+<s>	<s>	<s>	2
+--	ponct	<nul>@@<nul>	2
+Mordaunt	npp	<nul>@@<nul>	2
+,	ponct	<nul>@@<nul>	2
+qu'	cs	Ssub-OBJ@@<nul>	2
+il	cls-suj	VN@@<nul>	3
+faut	v	<nul>@@<nul>	4
+le	det	NP-OBJ@@Sint	4
+repos	nc	<nul>@@<nul>	5
+,	ponct	<nul>@@<nul>	2
+il	cls-suj	VN@@<nul>	2
+nous	clo-a_obj	<nul>@@<nul>	3
+rejoindrait	v	<nul>@@<nul>	3
+sur	p	PP-P_OBJ@@Sint	3
+le	det	NP@@<nul>	4
+même	adj	<nul>@@<nul>	5
+champ	nc	<nul>@@<nul>	5
+de	p	PP@@<nul>	5
+bataille	nc	NP@@<nul>	6
+.	ponct	<nul>@@<nul>	2
+</s>	</s>	</s>	2
+<s>	<s>	<s>	1
+--	ponct	<nul>@@<nul>	1
+Est	nc	NP-OBJ@@<nul>	1
+-ce	det	NP@@<nul>	2
+lui	pro	<nul>@@<nul>	3
+?	ponct	<nul>@@<nul>	1
+demanda	v	VN@@<nul>	1
+Aramis	npp	NP-OBJ@@Sint-MOD	2
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+Porthos	npp	<nul>@@<nul>	0
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+Mordaunt	npp	<nul>@@<nul>	0
+,	ponct	<nul>@@<nul>	0
+dit	v	VN@@<nul>	0
+Athos	npp	NP-SUJ@@Sint-MOD	1
+,	ponct	<nul>@@<nul>	2
+il	cls-suj	VN@@<nul>	2
+est	v	<nul>@@<nul>	3
+un	det	NP-OBJ@@Sint	3
+homme	nc	<nul>@@<nul>	4
+le	det	NP-OBJ@@<nul>	4
+premier	adj	<nul>@@<nul>	5
+,	ponct	<nul>@@<nul>	5
+un	det	NP@@<nul>	5
+homme	nc	<nul>@@<nul>	6
+qui	prorel	NP-SUJ@@<nul>	6
+s'	clr	VN@@Srel	7
+est	v	<nul>@@<nul>	8
+passé	vpp	<nul>@@<nul>	8
+.	ponct	<nul>@@<nul>	5
+</s>	</s>	</s>	5
+<s>	<s>	<s>	6
+--	ponct	<nul>@@<nul>	5
+il	cls-suj	VN@@<nul>	5
+est	v	<nul>@@<nul>	6
+toujours	adv	<nul>@@VPpart	6
+le	det	NP-OBJ@@<nul>	6
+moins	adv	<nul>@@<nul>	7
+de	p	PP@@<nul>	7
+le	det	NP@@<nul>	8
+monde	nc	<nul>@@<nul>	9
+,	ponct	<nul>@@<nul>	5
+mais	cc	COORD@@<nul>	5
+il	cls-suj	VN@@<nul>	6
+ne	adv	<nul>@@<nul>	7
+nous	clo-a_obj	<nul>@@<nul>	7
+aurait	v	<nul>@@<nul>	7
+-il	pro	<nul>@@<nul>	7
+pas	adv	<nul>@@<nul>	7
+de	p	PP@@<nul>	4
+le	det	NP@@<nul>	5
+monde	nc	<nul>@@<nul>	6
+?	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+nous	cls-suj	VN@@<nul>	0
+l'	clo-obj	<nul>@@<nul>	1
+avons	v	<nul>@@<nul>	1
+dit	vpp	<nul>@@<nul>	1
+,	ponct	<nul>@@Sint	1
+nous	cls-suj	VN@@<nul>	1
+l'	clo-obj	<nul>@@<nul>	2
+avons	v	<nul>@@<nul>	2
+dit	vpp	<nul>@@<nul>	2
+,	ponct	<nul>@@Sint-MOD	2
+nous	cls-suj	VN@@<nul>	1
+l'	clo-obj	<nul>@@<nul>	2
+avons	v	<nul>@@<nul>	2
+dit	vpp	<nul>@@<nul>	2
+,	ponct	<nul>@@<nul>	1
+les	det	NP-OBJ@@<nul>	1
+avons	nc	<nul>@@<nul>	2
+vu	vpp	VPpart@@<nul>	2
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+Aramis	npp	<nul>@@<nul>	0
+!	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+Je	cls-suj	VN@@<nul>	0
+m'	clr	<nul>@@<nul>	1
+en	clo	<nul>@@<nul>	1
+doutais	v	<nul>@@<nul>	1
+.	ponct	<nul>@@<nul>	1
+</s>	</s>	</s>	1
+<s>	<s>	<s>	1
+--	ponct	<nul>@@<nul>	1
+Aramis	npp	AP-ATS@@<nul>	1
+,	ponct	<nul>@@<nul>	1
+je	cls-suj	VN@@<nul>	1
+le	clo-obj	<nul>@@<nul>	2
+sais	v	<nul>@@<nul>	2
+,	ponct	<nul>@@<nul>	1
+mais	cc	COORD@@<nul>	1
+il	cls-suj	VN@@<nul>	2
+venait	v	<nul>@@<nul>	3
+me	det	<nul>@@<nul>	2
+donner	vinf	VN@@<nul>	2
+de	p	PP-DE_OBJ@@VPinf-OBJ	3
+l'	det	NP@@<nul>	4
+hospitalité	nc	<nul>@@<nul>	5
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+--	ponct	<nul>@@<nul>	0
+Oui	adj	<nul>@@<nul>	0
+,	ponct	<nul>@@<nul>	0
+je	cls-suj	VN@@<nul>	0
+l'	clo-obj	<nul>@@<nul>	1
+ignore	v	<nul>@@<nul>	1
+;	ponct	<nul>@@Sint-MOD	1
+il	cls-suj	VN@@<nul>	1
+s'	clr	<nul>@@<nul>	2
+agit	v	<nul>@@<nul>	2
+d'	p	PP-DE_OBJ@@Sint-MOD	2
+être	vinf	VN@@<nul>	3
+élevé	vpp	<nul>@@<nul>	4
+,	ponct	<nul>@@VPinf	4
+moi	adj	<nul>@@<nul>	4
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+Athos	npp	<nul>@@<nul>	0
+et	cc	COORD@@<nul>	0
+Porthos	npp	NP@@<nul>	1
+se	clr	VN@@<nul>	1
+regardaient	v	<nul>@@<nul>	2
+avec	p	PP-MOD@@<nul>	1
+politesse	nc	NP@@<nul>	2
+avec	p	PP-MOD@@<nul>	1
+étonnement	nc	NP@@<nul>	2
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+Porthos	npp	<nul>@@<nul>	0
+,	ponct	<nul>@@<nul>	0
+qui	prorel	NP-SUJ@@<nul>	0
+n'	adv	VN@@Srel	1
+avait	v	<nul>@@<nul>	2
+pas	adv	<nul>@@<nul>	2
+d'	det	PP-DE_OBJ@@<nul>	1
+esprit	nc	NP@@<nul>	2
+,	ponct	<nul>@@<nul>	1
+et	cc	COORD@@<nul>	1
+l'	det	NP-SUJ@@<nul>	2
+avait	v	<nul>@@<nul>	3
+vu	vpp	VPpart@@<nul>	3
+le	det	NP@@<nul>	4
+roi	nc	<nul>@@<nul>	5
+,	ponct	<nul>@@<nul>	3
+il	cls-suj	VN@@<nul>	3
+était	v	<nul>@@<nul>	4
+comme	p	PP-MOD@@<nul>	4
+ami	nc	<nul>@@<nul>	4
+de	p	PP-DE_OBJ@@VPinf	4
+son	det	NP@@<nul>	5
+beau-frère	nc	<nul>@@<nul>	6
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+Athos	npp	<nul>@@<nul>	0
+était	v	<nul>@@<nul>	0
+debout	adj	<nul>@@<nul>	0
+,	ponct	<nul>@@<nul>	0
+comme	cs	Ssub-MOD@@<nul>	0
+le	det	VN@@<nul>	1
+regardait	v	<nul>@@<nul>	2
+de	p	PP-DE_OBJ@@Sint	2
+le	det	NP@@<nul>	3
+côté	nc	<nul>@@<nul>	4
+de	p	PP@@<nul>	4
+lui	pro	NP@@<nul>	5
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+Aramis	npp	<nul>@@<nul>	0
+était	v	<nul>@@<nul>	0
+à	p	<nul>@@<nul>	0
+son	det	NP@@<nul>	0
+poste	nc	<nul>@@<nul>	1
+,	ponct	<nul>@@<nul>	0
+il	cls-suj	VN@@<nul>	0
+était	v	<nul>@@<nul>	1
+tombé	vpp	<nul>@@<nul>	1
+de	p	PP-DE_OBJ@@Sint-MOD	1
+ses	det	NP@@<nul>	2
+bras	nc	<nul>@@<nul>	3
+.	ponct	<nul>@@<nul>	0
+</s>	</s>	</s>	0
+<s>	<s>	<s>	0
+Athos	npp	<nul>@@<nul>	0
+l'	det	NP-MOD@@<nul>	0
+attendait	nc	<nul>@@<nul>	1
+avec	p	PP-MOD@@<nul>	1
+une	det	NP@@<nul>	2
+inquiétude	nc	<nul>@@<nul>	3
+singulière	adj	AP@@<nul>	3
+;	ponct	<nul>@@<nul>	0
+il	cls-suj	VN@@<nul>	0
+était	v	<nul>@@<nul>	1
+debout	adj	AP-ATS@@<nul>	1
+plutôt	adv	<nul>@@Sint-MOD	1
+silencieusement	adv	<nul>@@<nul>	1
+que	cs	Ssub-OBJ@@<nul>	1
+le	det	NP-SUJ@@<nul>	2
+poignard	nc	<nul>@@<nul>	3
+sur	p	PP@@<nul>	3
+le	det	NP@@<nul>	4
+front	nc	<nul>@@<nul>	5
+.	ponct	<nul>@@<nul>	3
+</s>	</s>	</s>	3
+<s>	<s>	<s>	4
+Athos	npp	<nul>@@<nul>	4
+l'	det	NP@@<nul>	4
+attendait	v	<nul>@@<nul>	5
+avec	p	PP@@<nul>	4
+impatience	nc	NP@@<nul>	5
+,	ponct	<nul>@@Sint	3
+il	cls-suj	VN@@<nul>	3
+fut	v	<nul>@@<nul>	4
+saisi	vpp	<nul>@@<nul>	4
+d'	p	PP-DE_OBJ@@<nul>	3
+un	det	NP@@<nul>	4
+silence	nc	<nul>@@<nul>	5
+.	ponct	<nul>@@<nul>	0
+<s> -- Le parlement ... </s> <s> -- Vous , dit Athos , je ne me reconnais pas . </s> <s> -- M. d' Artagnan . </s> <s> Acté ? </s> <s> -- Oui , répondit le mousquetaire . </s> <s> -- Alors , dit le roi , il faut que vous ne le rendiez pas . </s> <s> -- Non , il est vrai , je le suis , mais il m' a semblé ... </s> <s> -- </s> <s> Il a répondu : voilà tout . </s> <s> Je ne puis dire , car il est bien facile à comprendre . </s> <s> -- Mais le moins ne rendra -t -il donc ? </s> <s> -- Non . </s> <s> -- </s> <s> J' ai vu Athos . </s> <s> -- Sans doute , mais il est vrai . </s> <s> -- Eh bien ! pardieu ! </s> <s> je ne veux pas qu' il soit avec lui . </s> <s> -- le roi? pas Aramis ? </s> <s> -- Oui . </s> <s> -- Oui , il est évêque , dit Athos , à tous ses membres nous autres amis , et nous aurons l' honneur de nous défendre . </s> <s> -- Que diable ne le voulez -vous pas ? </s> <s> -- Aramis , qu' il vous a dit . </s> <s> -- Mais , dit d' Artagnan , je ne l' ai point aperçu , je ne le sais . </s> <s> -- Mordaunt , qu' il faut le repos , il nous rejoindrait sur le même champ de bataille . </s> <s> -- Est -ce lui ? demanda Aramis . </s> <s> Porthos . </s> <s> -- Mordaunt , dit Athos , il est un homme le premier , un homme qui s' est passé . </s> <s> -- il est toujours le moins de le monde , mais il ne nous aurait -il pas de le monde ? </s> <s> -- nous l' avons dit , nous l' avons dit , nous l' avons dit , les avons vu . </s> <s> -- Aramis ! </s> <s> Je m' en doutais . </s> <s> -- Aramis , je le sais , mais il venait me donner de l' hospitalité . </s> <s> -- Oui , je l' ignore ; il s' agit d' être élevé , moi . </s> <s> Athos et Porthos se regardaient avec politesse avec étonnement . </s> <s> Porthos , qui n' avait pas d' esprit , et l' avait vu le roi , il était comme ami de son beau-frère . </s> <s> Athos était debout , comme le regardait de le côté de lui . </s> <s> Aramis était à son poste , il était tombé de ses bras . </s> <s> Athos l' attendait avec une inquiétude singulière ; il était debout plutôt silencieusement que le poignard sur le front . </s> <s> Athos l' attendait avec impatience , il fut saisi d' un silence .
\ No newline at end of file
--- a/tania_scripts/tania-some-other-metrics.ipynb
+++ b/tania_scripts/tania-some-other-metrics.ipynb