diff --git a/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb b/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb
index 76b435504a3972105238e6a1cc9f1a95ac0c833b..432660ba42e45d9089d7ec0866fa314d38ec94a8 100644
--- a/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb
+++ b/tania_scripts/.ipynb_checkpoints/tania-some-other-metrics-checkpoint.ipynb
@@ -10,15 +10,80 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 12,
+   "id": "510c3726-366d-4e26-a2bb-b55391b473bd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package punkt to\n",
+      "[nltk_data]     /home/tatiana.bladier/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import re\n",
+    "import nltk\n",
+    "from nltk.tokenize import sent_tokenize, word_tokenize\n",
+    "\n",
+    "# Download once if not already\n",
+    "#nltk.download('punkt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "ee59c294-fdcd-429e-a126-734480d1b0ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_text = \"\"\"\n",
+    "<s>\t<s>\t<s>\t0\n",
+    "Aramis\tnpp\t<nul>@@<nul>\t0\n",
+    "était\tv\t<nul>@@<nul>\t0\n",
+    "à\tp\t<nul>@@<nul>\t0\n",
+    "son\tdet\tNP@@<nul>\t0\n",
+    "poste\tnc\t<nul>@@<nul>\t1\n",
+    ",\tponct\t<nul>@@<nul>\t0\n",
+    "il\tcls-suj\tVN@@<nul>\t0\n",
+    "était\tv\t<nul>@@<nul>\t1\n",
+    "tombé\tvpp\t<nul>@@<nul>\t1\n",
+    "de\tp\tPP-DE_OBJ@@Sint-MOD\t1\n",
+    "ses\tdet\tNP@@<nul>\t2\n",
+    "bras\tnc\t<nul>@@<nul>\t3\n",
+    ".\tponct\t<nul>@@<nul>\t0\n",
+    "</s>\t</s>\t</s>\t0\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
    "id": "b6ae41ef-116f-473d-b3f3-115d90fe65b7",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def compute_ttr(text):\n",
+    "import string\n",
+    "\n",
+    "def compute_ttr_cleaned(text):\n",
     "    \"\"\"\n",
     "    Compute the type/token ratio (TTR) from column-formatted text.\n",
-    "    Only the first column is used (tokens).\n",
+    "    - Only the first column is used (tokens).\n",
+    "    - Tokens are lowercased.\n",
+    "    - Punctuation tokens are ignored.\n",
     "\n",
     "    Parameters:\n",
     "    - text: str, the input text in column format\n",
@@ -30,8 +95,9 @@
     "\n",
     "    for line in text.strip().splitlines():\n",
     "        if line.strip():  # skip empty lines\n",
-    "            token = line.split('\\t')[0]\n",
-    "            tokens.append(token)\n",
+    "            token = line.split('\\t')[0].lower()\n",
+    "            if token not in string.punctuation:\n",
+    "                tokens.append(token)\n",
     "\n",
     "    if not tokens:\n",
     "        return 0.0\n",
@@ -42,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 15,
    "id": "2a882cc9-8f9d-4457-becb-d2e26ab3f14f",
    "metadata": {},
    "outputs": [
@@ -55,34 +121,303 @@
     }
    ],
    "source": [
-    "sample_text = \"\"\"\n",
-    "<s>\t<s>\t<s>\t0\n",
-    "Aramis\tnpp\t<nul>@@<nul>\t0\n",
-    "était\tv\t<nul>@@<nul>\t0\n",
-    "à\tp\t<nul>@@<nul>\t0\n",
-    "son\tdet\tNP@@<nul>\t0\n",
-    "poste\tnc\t<nul>@@<nul>\t1\n",
-    ",\tponct\t<nul>@@<nul>\t0\n",
-    "il\tcls-suj\tVN@@<nul>\t0\n",
-    "était\tv\t<nul>@@<nul>\t1\n",
-    "tombé\tvpp\t<nul>@@<nul>\t1\n",
-    "de\tp\tPP-DE_OBJ@@Sint-MOD\t1\n",
-    "ses\tdet\tNP@@<nul>\t2\n",
-    "bras\tnc\t<nul>@@<nul>\t3\n",
-    ".\tponct\t<nul>@@<nul>\t0\n",
-    "</s>\t</s>\t</s>\t0\n",
-    "\"\"\"\n",
-    "\n",
     "ttr = compute_ttr(sample_text)\n",
     "print(f\"Type/Token Ratio: {ttr:.3f}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "id": "8897dcc3-4218-4ee5-9984-17b9a6d8dce2",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "def compute_ttr_by_pos(text):\n",
+    "    \"\"\"\n",
+    "    Compute type/token ratios for verbs and nouns in column-formatted text.\n",
+    "    - Columns: token \\t pos \\t ...\n",
+    "    - Verbs: POS in {'v', 'vpp', 'vpr'}\n",
+    "    - Nouns: POS in {'nc', 'npp'}\n",
+    "    - Tokens are lowercased.\n",
+    "    - Punctuation is ignored.\n",
+    "\n",
+    "    Returns:\n",
+    "    - A dictionary with TTRs for verbs and nouns.\n",
+    "    \"\"\"\n",
+    "    import string\n",
+    "\n",
+    "    verb_pos = {'v', 'vpp', 'vpr'}\n",
+    "    noun_pos = {'nc', 'npp'}\n",
+    "\n",
+    "    verb_tokens = []\n",
+    "    noun_tokens = []\n",
+    "\n",
+    "    for line in text.strip().splitlines():\n",
+    "        if line.strip():\n",
+    "            parts = line.split('\\t')\n",
+    "            if len(parts) >= 2:\n",
+    "                token = parts[0].lower()\n",
+    "                pos = parts[1]\n",
+    "\n",
+    "                # Ignore punctuation\n",
+    "                if token in string.punctuation:\n",
+    "                    continue\n",
+    "\n",
+    "                if pos in verb_pos:\n",
+    "                    verb_tokens.append(token)\n",
+    "                elif pos in noun_pos:\n",
+    "                    noun_tokens.append(token)\n",
+    "\n",
+    "    # Compute TTRs\n",
+    "    ttr_verb = len(set(verb_tokens)) / len(verb_tokens) if verb_tokens else 0.0\n",
+    "    ttr_noun = len(set(noun_tokens)) / len(noun_tokens) if noun_tokens else 0.0\n",
+    "\n",
+    "    return {\n",
+    "        'verb_ttr': ttr_verb,\n",
+    "        'noun_ttr': ttr_noun\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "1363f307-fa4b-43ba-93d5-2d1c11ceb9e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Verb TTR: 0.667\n",
+      "Noun TTR: 1.000\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = compute_ttr_by_pos(sample_text)\n",
+    "print(f\"Verb TTR: {result['verb_ttr']:.3f}\")\n",
+    "print(f\"Noun TTR: {result['noun_ttr']:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "1362e192-514a-4a77-a8cb-5c012026e2bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_nv_ratios(text):\n",
+    "    \"\"\"\n",
+    "    Compute nominal/verb and verb/nominal ratios from column-formatted text.\n",
+    "    - Uses the second column (POS).\n",
+    "    - Verbs: 'v', 'vpp', 'vpr'\n",
+    "    - Nouns: 'nc', 'npp'\n",
+    "\n",
+    "    Returns:\n",
+    "    - Dictionary with 'nominal_verb_ratio' and 'verb_nominal_ratio'\n",
+    "    \"\"\"\n",
+    "    verb_pos = {'v', 'vpp', 'vpr'}\n",
+    "    noun_pos = {'nc', 'npp'}\n",
+    "\n",
+    "    verb_count = 0\n",
+    "    noun_count = 0\n",
+    "\n",
+    "    for line in text.strip().splitlines():\n",
+    "        if line.strip():\n",
+    "            parts = line.split('\\t')\n",
+    "            if len(parts) >= 2:\n",
+    "                pos = parts[1]\n",
+    "                if pos in verb_pos:\n",
+    "                    verb_count += 1\n",
+    "                elif pos in noun_pos:\n",
+    "                    noun_count += 1\n",
+    "\n",
+    "    nominal_verb_ratio = noun_count / verb_count if verb_count else float('inf')\n",
+    "    verb_nominal_ratio = verb_count / noun_count if noun_count else float('inf')\n",
+    "\n",
+    "    return {\n",
+    "        'nominal_verb_ratio': nominal_verb_ratio,\n",
+    "        'verb_nominal_ratio': verb_nominal_ratio\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "544ff6aa-4104-4580-a01f-97429ffcc228",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Nominal/Verb Ratio: 1.00\n",
+      "Verb/Nominal Ratio: 1.00\n"
+     ]
+    }
+   ],
+   "source": [
+    "ratios = compute_nv_ratios(sample_text)\n",
+    "print(f\"Nominal/Verb Ratio: {ratios['nominal_verb_ratio']:.2f}\")\n",
+    "print(f\"Verb/Nominal Ratio: {ratios['verb_nominal_ratio']:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d3a929bf-61cb-4ef8-bc00-6e2a59760d37",
+   "metadata": {},
+   "source": [
+    "\n",
+    "## Readability"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3fe25ff0-3f83-40fe-8420-08c09ffe98e6",
+   "metadata": {},
+   "source": [
+    "### 📚 French Readability MetricsTodo: verify this\n",
+    "\n",
+    "This notebook implements and explains three common **readability formulas** tailored for **French texts**:\n",
+    "\n",
+    "---\n",
+    "\n",
+    "#### 1. 🟦 **Flesch–Douma Index**\n",
+    "\n",
+    "An adaptation of the original Flesch Reading Ease formula for French.\n",
+    "\n",
+    "$\\text{Flesch–Douma} = 207 - (1.015 \\times \\text{ASL}) - (73.6 \\times \\text{ASW})$\n",
+    "\n",
+    "Where:\n",
+    "- **ASL** = Average Sentence Length = (number of words) / (number of sentences)\n",
+    "- **ASW** = Average Syllables per Word = (number of syllables) / (number of words)\n",
+    "\n",
+    "📊 **Interpretation**:\n",
+    "- 90–100: Very easy\n",
+    "- 60–70: Standard\n",
+    "- 30–50: Difficult\n",
+    "- < 30: Very difficult\n",
+    "\n",
+    "---\n",
+    "\n",
+    "#### 2. 🟨 **LIX Index**\n",
+    "\n",
+    "Used widely in French and other European languages. Measures sentence length and lexical complexity.\n",
+    "\n",
+    "$\\text{LIX} = \\frac{\\text{number of words}}{\\text{number of sentences}} + \\frac{100 \\times \\text{number of long words (≥7 chars)}}{\\text{number of words}}$\n",
+    "\n",
+    "📊 **Interpretation**:\n",
+    "- $<$ 30: Easy\n",
+    "- 30–40: Medium\n",
+    "- $>$ 50: Difficult\n",
+    "\n",
+    "---\n",
+    "\n",
+    "#### 3. 🟥 **Kandel–Moles Index**\n",
+    "\n",
+    "A linear formula proposed for French readability:\n",
+    "\n",
+    "$\\text{Kandel–Moles} = 0.1935 \\times \\text{number of words} + 0.1672 \\times \\text{number of syllables} - 1.779$\n",
+    "\n",
+    "📊 **Interpretation**:\n",
+    "- Higher values indicate more complex texts.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "These formulas help estimate how easily a French reader can understand a given passage. The metrics can be used to analyze textbooks, articles, instructional materials, etc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "b9052dc2-ce45-4af4-a0a0-46c60a13da12",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Rewriting the readability metric functions here, without relying on downloading external resources\n",
+    "\n",
+    "import re\n",
+    "\n",
+    "# Naive sentence splitter (based on punctuation)\n",
+    "def naive_sentence_tokenize(text):\n",
+    "    return re.split(r'[.!?]+', text.strip())\n",
+    "\n",
+    "# Naive word tokenizer (splits on whitespace and punctuation)\n",
+    "def naive_word_tokenize(text):\n",
+    "    return re.findall(r'\\b\\w+\\b', text.lower())\n",
+    "\n",
+    "# Function to count syllables in a French word (naive method using vowel groups)\n",
+    "def count_syllables(word):\n",
+    "    vowels = \"aeiouyàâäéèêëîïôöùûüœ\"\n",
+    "    syllables = re.findall(rf\"[{vowels}]+\", word)\n",
+    "    return max(1, len(syllables))\n",
+    "\n",
+    "# Function to compute Flesch-Douma, LIX, and Kandel-Moles indices\n",
+    "def compute_french_readability(text):\n",
+    "    sentences = [s for s in naive_sentence_tokenize(text) if s.strip()]\n",
+    "    words = [w for w in naive_word_tokenize(text) if re.match(r\"\\w+\", w)]\n",
+    "    \n",
+    "    num_sentences = len(sentences)\n",
+    "    num_words = len(words)\n",
+    "    num_syllables = sum(count_syllables(w) for w in words)\n",
+    "    num_long_words = sum(1 for w in words if len(w) >= 7)\n",
+    "\n",
+    "    if num_sentences == 0 or num_words == 0:\n",
+    "        return {\n",
+    "            \"Flesch-Douma\": 0.0,\n",
+    "            \"LIX\": 0.0,\n",
+    "            \"Kandel-Moles\": 0.0\n",
+    "        }\n",
+    "\n",
+    "    # Flesch-Douma\n",
+    "    asl = num_words / num_sentences  # Average sentence length\n",
+    "    asw = num_syllables / num_words  # Average syllables per word\n",
+    "    flesch_douma = 207 - (1.015 * asl) - (73.6 * asw)\n",
+    "\n",
+    "    # LIX\n",
+    "    lix = (num_words / num_sentences) + (100 * num_long_words / num_words)\n",
+    "\n",
+    "    # Kandel-Moles\n",
+    "    kandel_moles = 0.1935 * num_words + 0.1672 * num_syllables - 1.779\n",
+    "\n",
+    "    return {\n",
+    "        \"Flesch-Douma\": round(flesch_douma, 2),\n",
+    "        \"LIX\": round(lix, 2),\n",
+    "        \"Kandel-Moles\": round(kandel_moles, 2)\n",
+    "    }\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "1e9dd0fb-db6a-47d1-8bfb-1015845f6d3e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Flesch-Douma': 88.68, 'LIX': 11.55, 'Kandel-Moles': 5.86}"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Test on a sample French text\n",
+    "sample_french_text = \"\"\"\n",
+    "Aramis était à son poste. Il était tombé de ses bras. Ce n'était pas un accident.\n",
+    "\"\"\"\n",
+    "compute_french_readability(sample_french_text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b2cd53c6-1c16-4eaf-8ac8-af166afaa97b",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],
diff --git a/tania_scripts/tania-some-other-metrics.ipynb b/tania_scripts/tania-some-other-metrics.ipynb
index 76b435504a3972105238e6a1cc9f1a95ac0c833b..432660ba42e45d9089d7ec0866fa314d38ec94a8 100644
--- a/tania_scripts/tania-some-other-metrics.ipynb
+++ b/tania_scripts/tania-some-other-metrics.ipynb
@@ -10,15 +10,80 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 12,
+   "id": "510c3726-366d-4e26-a2bb-b55391b473bd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package punkt to\n",
+      "[nltk_data]     /home/tatiana.bladier/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import re\n",
+    "import nltk\n",
+    "from nltk.tokenize import sent_tokenize, word_tokenize\n",
+    "\n",
+    "# Download once if not already\n",
+    "#nltk.download('punkt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "ee59c294-fdcd-429e-a126-734480d1b0ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_text = \"\"\"\n",
+    "<s>\t<s>\t<s>\t0\n",
+    "Aramis\tnpp\t<nul>@@<nul>\t0\n",
+    "était\tv\t<nul>@@<nul>\t0\n",
+    "à\tp\t<nul>@@<nul>\t0\n",
+    "son\tdet\tNP@@<nul>\t0\n",
+    "poste\tnc\t<nul>@@<nul>\t1\n",
+    ",\tponct\t<nul>@@<nul>\t0\n",
+    "il\tcls-suj\tVN@@<nul>\t0\n",
+    "était\tv\t<nul>@@<nul>\t1\n",
+    "tombé\tvpp\t<nul>@@<nul>\t1\n",
+    "de\tp\tPP-DE_OBJ@@Sint-MOD\t1\n",
+    "ses\tdet\tNP@@<nul>\t2\n",
+    "bras\tnc\t<nul>@@<nul>\t3\n",
+    ".\tponct\t<nul>@@<nul>\t0\n",
+    "</s>\t</s>\t</s>\t0\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
    "id": "b6ae41ef-116f-473d-b3f3-115d90fe65b7",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def compute_ttr(text):\n",
+    "import string\n",
+    "\n",
+    "def compute_ttr_cleaned(text):\n",
     "    \"\"\"\n",
     "    Compute the type/token ratio (TTR) from column-formatted text.\n",
-    "    Only the first column is used (tokens).\n",
+    "    - Only the first column is used (tokens).\n",
+    "    - Tokens are lowercased.\n",
+    "    - Punctuation tokens are ignored.\n",
     "\n",
     "    Parameters:\n",
     "    - text: str, the input text in column format\n",
@@ -30,8 +95,9 @@
     "\n",
     "    for line in text.strip().splitlines():\n",
     "        if line.strip():  # skip empty lines\n",
-    "            token = line.split('\\t')[0]\n",
-    "            tokens.append(token)\n",
+    "            token = line.split('\\t')[0].lower()\n",
+    "            if token not in string.punctuation:\n",
+    "                tokens.append(token)\n",
     "\n",
     "    if not tokens:\n",
     "        return 0.0\n",
@@ -42,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 15,
    "id": "2a882cc9-8f9d-4457-becb-d2e26ab3f14f",
    "metadata": {},
    "outputs": [
@@ -55,34 +121,303 @@
     }
    ],
    "source": [
-    "sample_text = \"\"\"\n",
-    "<s>\t<s>\t<s>\t0\n",
-    "Aramis\tnpp\t<nul>@@<nul>\t0\n",
-    "était\tv\t<nul>@@<nul>\t0\n",
-    "à\tp\t<nul>@@<nul>\t0\n",
-    "son\tdet\tNP@@<nul>\t0\n",
-    "poste\tnc\t<nul>@@<nul>\t1\n",
-    ",\tponct\t<nul>@@<nul>\t0\n",
-    "il\tcls-suj\tVN@@<nul>\t0\n",
-    "était\tv\t<nul>@@<nul>\t1\n",
-    "tombé\tvpp\t<nul>@@<nul>\t1\n",
-    "de\tp\tPP-DE_OBJ@@Sint-MOD\t1\n",
-    "ses\tdet\tNP@@<nul>\t2\n",
-    "bras\tnc\t<nul>@@<nul>\t3\n",
-    ".\tponct\t<nul>@@<nul>\t0\n",
-    "</s>\t</s>\t</s>\t0\n",
-    "\"\"\"\n",
-    "\n",
     "ttr = compute_ttr(sample_text)\n",
     "print(f\"Type/Token Ratio: {ttr:.3f}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "id": "8897dcc3-4218-4ee5-9984-17b9a6d8dce2",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "def compute_ttr_by_pos(text):\n",
+    "    \"\"\"\n",
+    "    Compute type/token ratios for verbs and nouns in column-formatted text.\n",
+    "    - Columns: token \\t pos \\t ...\n",
+    "    - Verbs: POS in {'v', 'vpp', 'vpr'}\n",
+    "    - Nouns: POS in {'nc', 'npp'}\n",
+    "    - Tokens are lowercased.\n",
+    "    - Punctuation is ignored.\n",
+    "\n",
+    "    Returns:\n",
+    "    - A dictionary with TTRs for verbs and nouns.\n",
+    "    \"\"\"\n",
+    "    import string\n",
+    "\n",
+    "    verb_pos = {'v', 'vpp', 'vpr'}\n",
+    "    noun_pos = {'nc', 'npp'}\n",
+    "\n",
+    "    verb_tokens = []\n",
+    "    noun_tokens = []\n",
+    "\n",
+    "    for line in text.strip().splitlines():\n",
+    "        if line.strip():\n",
+    "            parts = line.split('\\t')\n",
+    "            if len(parts) >= 2:\n",
+    "                token = parts[0].lower()\n",
+    "                pos = parts[1]\n",
+    "\n",
+    "                # Ignore punctuation\n",
+    "                if token in string.punctuation:\n",
+    "                    continue\n",
+    "\n",
+    "                if pos in verb_pos:\n",
+    "                    verb_tokens.append(token)\n",
+    "                elif pos in noun_pos:\n",
+    "                    noun_tokens.append(token)\n",
+    "\n",
+    "    # Compute TTRs\n",
+    "    ttr_verb = len(set(verb_tokens)) / len(verb_tokens) if verb_tokens else 0.0\n",
+    "    ttr_noun = len(set(noun_tokens)) / len(noun_tokens) if noun_tokens else 0.0\n",
+    "\n",
+    "    return {\n",
+    "        'verb_ttr': ttr_verb,\n",
+    "        'noun_ttr': ttr_noun\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "1363f307-fa4b-43ba-93d5-2d1c11ceb9e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Verb TTR: 0.667\n",
+      "Noun TTR: 1.000\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = compute_ttr_by_pos(sample_text)\n",
+    "print(f\"Verb TTR: {result['verb_ttr']:.3f}\")\n",
+    "print(f\"Noun TTR: {result['noun_ttr']:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "1362e192-514a-4a77-a8cb-5c012026e2bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_nv_ratios(text):\n",
+    "    \"\"\"\n",
+    "    Compute nominal/verb and verb/nominal ratios from column-formatted text.\n",
+    "    - Uses the second column (POS).\n",
+    "    - Verbs: 'v', 'vpp', 'vpr'\n",
+    "    - Nouns: 'nc', 'npp'\n",
+    "\n",
+    "    Returns:\n",
+    "    - Dictionary with 'nominal_verb_ratio' and 'verb_nominal_ratio'\n",
+    "    \"\"\"\n",
+    "    verb_pos = {'v', 'vpp', 'vpr'}\n",
+    "    noun_pos = {'nc', 'npp'}\n",
+    "\n",
+    "    verb_count = 0\n",
+    "    noun_count = 0\n",
+    "\n",
+    "    for line in text.strip().splitlines():\n",
+    "        if line.strip():\n",
+    "            parts = line.split('\\t')\n",
+    "            if len(parts) >= 2:\n",
+    "                pos = parts[1]\n",
+    "                if pos in verb_pos:\n",
+    "                    verb_count += 1\n",
+    "                elif pos in noun_pos:\n",
+    "                    noun_count += 1\n",
+    "\n",
+    "    nominal_verb_ratio = noun_count / verb_count if verb_count else float('inf')\n",
+    "    verb_nominal_ratio = verb_count / noun_count if noun_count else float('inf')\n",
+    "\n",
+    "    return {\n",
+    "        'nominal_verb_ratio': nominal_verb_ratio,\n",
+    "        'verb_nominal_ratio': verb_nominal_ratio\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "544ff6aa-4104-4580-a01f-97429ffcc228",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Nominal/Verb Ratio: 1.00\n",
+      "Verb/Nominal Ratio: 1.00\n"
+     ]
+    }
+   ],
+   "source": [
+    "ratios = compute_nv_ratios(sample_text)\n",
+    "print(f\"Nominal/Verb Ratio: {ratios['nominal_verb_ratio']:.2f}\")\n",
+    "print(f\"Verb/Nominal Ratio: {ratios['verb_nominal_ratio']:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d3a929bf-61cb-4ef8-bc00-6e2a59760d37",
+   "metadata": {},
+   "source": [
+    "\n",
+    "## Readability"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3fe25ff0-3f83-40fe-8420-08c09ffe98e6",
+   "metadata": {},
+   "source": [
+    "### 📚 French Readability MetricsTodo: verify this\n",
+    "\n",
+    "This notebook implements and explains three common **readability formulas** tailored for **French texts**:\n",
+    "\n",
+    "---\n",
+    "\n",
+    "#### 1. 🟦 **Flesch–Douma Index**\n",
+    "\n",
+    "An adaptation of the original Flesch Reading Ease formula for French.\n",
+    "\n",
+    "$\\text{Flesch–Douma} = 207 - (1.015 \\times \\text{ASL}) - (73.6 \\times \\text{ASW})$\n",
+    "\n",
+    "Where:\n",
+    "- **ASL** = Average Sentence Length = (number of words) / (number of sentences)\n",
+    "- **ASW** = Average Syllables per Word = (number of syllables) / (number of words)\n",
+    "\n",
+    "📊 **Interpretation**:\n",
+    "- 90–100: Very easy\n",
+    "- 60–70: Standard\n",
+    "- 30–50: Difficult\n",
+    "- < 30: Very difficult\n",
+    "\n",
+    "---\n",
+    "\n",
+    "#### 2. 🟨 **LIX Index**\n",
+    "\n",
+    "Used widely in French and other European languages. Measures sentence length and lexical complexity.\n",
+    "\n",
+    "$\\text{LIX} = \\frac{\\text{number of words}}{\\text{number of sentences}} + \\frac{100 \\times \\text{number of long words (≥7 chars)}}{\\text{number of words}}$\n",
+    "\n",
+    "📊 **Interpretation**:\n",
+    "- $<$ 30: Easy\n",
+    "- 30–40: Medium\n",
+    "- $>$ 50: Difficult\n",
+    "\n",
+    "---\n",
+    "\n",
+    "#### 3. 🟥 **Kandel–Moles Index**\n",
+    "\n",
+    "A linear formula proposed for French readability:\n",
+    "\n",
+    "$\\text{Kandel–Moles} = 0.1935 \\times \\text{number of words} + 0.1672 \\times \\text{number of syllables} - 1.779$\n",
+    "\n",
+    "📊 **Interpretation**:\n",
+    "- Higher values indicate more complex texts.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "These formulas help estimate how easily a French reader can understand a given passage. The metrics can be used to analyze textbooks, articles, instructional materials, etc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "b9052dc2-ce45-4af4-a0a0-46c60a13da12",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Rewriting the readability metric functions here, without relying on downloading external resources\n",
+    "\n",
+    "import re\n",
+    "\n",
+    "# Naive sentence splitter (based on punctuation)\n",
+    "def naive_sentence_tokenize(text):\n",
+    "    return re.split(r'[.!?]+', text.strip())\n",
+    "\n",
+    "# Naive word tokenizer (splits on whitespace and punctuation)\n",
+    "def naive_word_tokenize(text):\n",
+    "    return re.findall(r'\\b\\w+\\b', text.lower())\n",
+    "\n",
+    "# Function to count syllables in a French word (naive method using vowel groups)\n",
+    "def count_syllables(word):\n",
+    "    vowels = \"aeiouyàâäéèêëîïôöùûüœ\"\n",
+    "    syllables = re.findall(rf\"[{vowels}]+\", word)\n",
+    "    return max(1, len(syllables))\n",
+    "\n",
+    "# Function to compute Flesch-Douma, LIX, and Kandel-Moles indices\n",
+    "def compute_french_readability(text):\n",
+    "    sentences = [s for s in naive_sentence_tokenize(text) if s.strip()]\n",
+    "    words = [w for w in naive_word_tokenize(text) if re.match(r\"\\w+\", w)]\n",
+    "    \n",
+    "    num_sentences = len(sentences)\n",
+    "    num_words = len(words)\n",
+    "    num_syllables = sum(count_syllables(w) for w in words)\n",
+    "    num_long_words = sum(1 for w in words if len(w) >= 7)\n",
+    "\n",
+    "    if num_sentences == 0 or num_words == 0:\n",
+    "        return {\n",
+    "            \"Flesch-Douma\": 0.0,\n",
+    "            \"LIX\": 0.0,\n",
+    "            \"Kandel-Moles\": 0.0\n",
+    "        }\n",
+    "\n",
+    "    # Flesch-Douma\n",
+    "    asl = num_words / num_sentences  # Average sentence length\n",
+    "    asw = num_syllables / num_words  # Average syllables per word\n",
+    "    flesch_douma = 207 - (1.015 * asl) - (73.6 * asw)\n",
+    "\n",
+    "    # LIX\n",
+    "    lix = (num_words / num_sentences) + (100 * num_long_words / num_words)\n",
+    "\n",
+    "    # Kandel-Moles\n",
+    "    kandel_moles = 0.1935 * num_words + 0.1672 * num_syllables - 1.779\n",
+    "\n",
+    "    return {\n",
+    "        \"Flesch-Douma\": round(flesch_douma, 2),\n",
+    "        \"LIX\": round(lix, 2),\n",
+    "        \"Kandel-Moles\": round(kandel_moles, 2)\n",
+    "    }\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "1e9dd0fb-db6a-47d1-8bfb-1015845f6d3e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Flesch-Douma': 88.68, 'LIX': 11.55, 'Kandel-Moles': 5.86}"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Test on a sample French text\n",
+    "sample_french_text = \"\"\"\n",
+    "Aramis était à son poste. Il était tombé de ses bras. Ce n'était pas un accident.\n",
+    "\"\"\"\n",
+    "compute_french_readability(sample_french_text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b2cd53c6-1c16-4eaf-8ac8-af166afaa97b",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],