diff --git a/README.md b/README.md
index 6c6b963bb50465ccee444ff9d097e2d3c6896d81..eb822822ecc01c33b79147b00740bd009aa0dd8d 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 The JSON object contains the following fields:
 
-- `lu`: The linguistic unit (LU) in the sentence.
+- `lu`: The lexical unit (LU)/trigger in the sentence.
 - `pos_lu`: The part of speech tag of the LU. (**corresponding to *f<sub>trigger</sub>***).
 - `lemma_lu`: The lemma or root form of the LU.
 - `frame`: The semantic frame associated with the LU.
@@ -21,6 +21,7 @@ The JSON object contains the following fields:
 - `predictions`: A dictionary containing model predictions and corresponding ROUGE-L scores. Each model has an entry with:
   - `answer_pred`: The predicted answer by the model.
   - `rougeL`: The ROUGE-L score of the prediction.
+  - `HScore`: The HScore of the prediction as computed in the paper : "Correct" as human annotation →  1, "Partiellement correct" → 0.5 and 0 otherwise. In the case of mutliple annotation for the same question, the average HScore of all the annotation is taken.
 
 - `human_annot`: A dictionary containing human annotations for each model's output. Each model has an entry which is a list of annotations:
   - `annot`: The annotation identifier.
@@ -32,7 +33,7 @@ The JSON object contains the following fields:
 - `entropy_frame`: Entropy of the question's frame, common to all the examples of this frame. (**corresponding to *f<sub>entropy</sub>***).
 - `complexity_vector` : Each element corresponds to a complexity factor, 1 if it's "active" and the example therefore corresponds to the difficult group, 0 otherwise. Indexes correspond to the following complexity factors: 
   - `0`: ***f<sub>LU in q</sub>***
-  - `1`: ***f<sub>trigger/sub>***
+  - `1`: ***f<sub>trigger</sub>***
   - `2`: ***f<sub>dist</sub>***
   - `3`: ***f<sub>entropy</sub>***
   - `4`: ***f<sub>nb FEs</sub>***
diff --git a/calor_complexity.json b/calor_complexity.json
index 7ae6330c59a3f8476f04fca061f62b3bd9e09508..553afe6433bff22349b30b42b7dd5f6008889475 100644
--- a/calor_complexity.json
+++ b/calor_complexity.json
@@ -26,33 +26,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "état souverain et indépendant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "état souverain et indépendant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "souverain et indépendant",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "souverain et indépendant",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "république",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un état souverain et indépendant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un état souverain et indépendant, puis une République",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -86,12 +93,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -154,42 +155,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Calme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Calme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Baonnettes plantées aux fusils par quelques sections obstinées, clairons qui sonnent la charge, bonds suprêmes d' isolés héroques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Calme affecté",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "calme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "calme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le calme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -282,33 +284,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "projectiles explosibles ouchargées de matières fulminantes ou inflammables",
-                  "rougeL": 0.8749999999999999
+                  "rougeL": 0.8749999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 0.9285714285714286
+                  "rougeL": 0.9285714285714286,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les projectiles explosibles ou chargés de matières fulminantes ou inflammables, d'un poids inférieur à 400 grammes.",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -330,12 +339,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -410,33 +413,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "passer à l' insurrection armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "passer à l' insurrection armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "passer à l' insurrection armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "passer à l' insurrection armée.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur la proposition de passer à l'insurrection armée",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "passer à l' insurrection armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le parti bolchevique décida de passer à l'insurrection armée.",
-                  "rougeL": 0.46153846153846156
+                  "rougeL": 0.46153846153846156,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -464,12 +474,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -532,33 +536,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "passer à l' insurrection armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "passer à l' insurrection armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "passer à l' insurrection armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "passer à l' insurrection armée.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "passer à l'insurrection armée",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "passer à l' insurrection armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "passer à l'insurrection armée",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -598,12 +609,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -660,33 +665,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' Italie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Italie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Italie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Italie,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Italie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Italie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Italie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -708,12 +720,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -788,33 +794,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de la préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de la préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -836,12 +849,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -910,33 +917,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "preparation générale insuffisante de toute notre armée",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -970,12 +984,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -1038,33 +1046,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la paix revient.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -1098,12 +1113,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -1160,33 +1169,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la paix revient.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -1220,12 +1236,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -1288,33 +1298,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "éviter l' affrontement",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "éviter l' affrontement",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "éviter l' affrontement en mettant fin à la grève",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mettant fin à la grève le 14 novembre 1918",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "d'éviter l'affrontement en mettant fin à la grève le 14 novembre 1918",
-                  "rougeL": 0.7368421052631577
+                  "rougeL": 0.7368421052631577,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mettant fin à la grève",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le comité d'Olten décidera de mettre fin à la grève",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -1342,12 +1359,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -1428,33 +1439,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Paul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Paul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un journal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Paul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un journal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un journal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le journal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -1464,12 +1482,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -1556,33 +1568,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son indépendance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son indépendance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son indépendance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' indépendance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "son indépendance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son indépendance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Belgique a perdu son indépendance.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -1616,12 +1635,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -1696,33 +1709,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Albert Barbet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Albert Barbet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Albert Barbet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Albert Barbet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Albert Barbet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Albert Barbet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Albert Barbet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -1756,12 +1776,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -1824,33 +1838,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la seule issue possible des conflits qui mettent en péril l' existence des Etats, leur liberté, leurs intérêts vitaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'horreur",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -1866,12 +1887,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -1946,33 +1961,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "horreur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'horreur qu'elle inspire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -2012,12 +2034,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -2086,33 +2102,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le bulletin d' information de l' Etat - major suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bulletin d' information de l' Etat - major suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bulletin d' information de l' Etat - major suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bulletin d' information de l' Etat - major suisse,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bulletin d' information de l' Etat - major suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le bulletin d' information de l' Etat - major suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le bulletin d'information de l'Etat-major suisse.",
-                  "rougeL": 0.30769230769230765
+                  "rougeL": 0.30769230769230765,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -2158,12 +2181,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -2214,33 +2231,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "groupes financiers importants d' Europe",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les groupes financiers importants d'Europe",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -2268,12 +2292,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -2337,33 +2355,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -2397,12 +2422,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -2466,33 +2485,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les Russes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les Russes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Allemands",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Russes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Russes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Russes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Allemands.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -2538,12 +2564,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -2595,33 +2615,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "réconciliation humaine et de recherche du bonheur social",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la liberté",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'harmonie entre les peuples",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -2649,12 +2676,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -2717,33 +2738,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "social",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "social",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "social",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -2789,12 +2817,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -2845,33 +2867,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' auteur de l' attentat,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -2911,12 +2940,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -2973,33 +2996,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chacun des marchés de capitaux s' occupant ou étant susceptible de s' occuper des autres, d' avoir une influence sur eux, de leur rendre des services ou de leur créer des difficultés",
-                  "rougeL": 0.35294117647058826
+                  "rougeL": 0.35294117647058826,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "marchés de capitaux",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chacun des marchés de capitaux s' occuper ou étant susceptible de s' occuper des autres, d' avoir une influence sur eux, de leur rendre des services ou de leur créer",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chacun des marchés de capitaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les affaires financières",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les marchés de capitaux",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les événements de 1911",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -3009,12 +3039,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -3107,33 +3131,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Allemagne,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'Allemagne",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'y a pas d'information dans l'article concernant l'aide de l'Italie.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -3173,12 +3204,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -3229,33 +3254,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Allemagne,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'Allemagne",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'y a pas de mention de l'Italie dans l'article, donc on ne peut pas répondre à la question posée.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -3271,12 +3303,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -3357,33 +3383,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Toute la génération",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Toute la génération",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Toute la génération",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Toute la génération",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Toute la génération",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Toute la génération",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la génération",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -3417,12 +3450,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -3485,33 +3512,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "général, le chef de l' Etat - major",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Conseil fédéral",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le général, le chef de l' Etat - major",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le général, le chef de l' Etat - major,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Chambres fédérales",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le général, le chef de l' Etat - major",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Chambres fédérales",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -3545,12 +3579,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -3619,33 +3647,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "300 moteurs par mois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "des camions, des moteurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -3691,12 +3726,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -3741,33 +3770,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des camions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -3795,12 +3831,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -3869,33 +3899,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "anglais et écossais",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "anglais et écossais",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "anglais et écossais",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "anglais et écossais",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "anglais et écossais",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "anglais et écossais",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "protestants anglais et écossais",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -3935,12 +3972,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -3991,33 +4022,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "protestants anglais et écossais",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "protestants anglais et écossais",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "protestants anglais et écossais",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "protestants anglais et écossais",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les protestants anglais et écossais",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "protestants anglais et écossais",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "protestants anglais et écossais",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -4057,12 +4095,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -4119,33 +4151,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Armée allemande de la Meuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Armée allemande de la Meuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Armée allemande de la Meuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Armée allemande de la Meuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Armée allemande de la Meuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Armée allemande de la Meuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Armée allemande de la Meuse",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -4185,12 +4224,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -4243,33 +4276,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Armée allemande de la Meuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Armée allemande de la Meuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Armée allemande de la Meuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Armée allemande de la Meuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'Armée allemande de la Meuse",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "celui qui voulait nous attaquer",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Armée allemande de la Meuse",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -4297,12 +4337,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -4373,33 +4407,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en vain l' ennemi",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ennemi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ennemi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' ennemi.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'ennemi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' ennemi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'ennemi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -4433,12 +4474,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -4501,33 +4536,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la guerre Au Reichstag",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le SPD soutient la guerre",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -4561,12 +4603,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -4623,33 +4659,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "guerre Au Reichstag",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "soutien à la guerre",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à la guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -4689,12 +4732,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -4751,33 +4788,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -4817,12 +4861,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -4874,33 +4912,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -4940,12 +4985,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -5003,33 +5042,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "En 1918, la classe ouvrière a perdu une bataille, mais remporté une victoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "En 1918, la classe ouvrière a perdu une bataille, mais remporté une victoire. La bataille a été courte, la victoire durable",
-                  "rougeL": 0.761904761904762
+                  "rougeL": 0.761904761904762,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "En 1918, la classe ouvrière a perdu une bataille, mais remporté une victoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "En 1918, la classe ouvrière a perdu une bataille, mais remporté une victoire.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "\"En 1918, la classe ouvrière a perdu une bataille, mais remporté une victoire. La bataille a été courte, la victoire durable \".",
-                  "rougeL": 0.761904761904762
+                  "rougeL": 0.761904761904762,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "En 1918, la classe ouvrière a perdu une bataille, mais remporté une victoire. La bataille a été courte, la victoire durable",
-                  "rougeL": 0.761904761904762
+                  "rougeL": 0.761904761904762,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "\"En 1918, la classe ouvrière a perdu une bataille, mais remporté une victoire.\"",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -5045,12 +5091,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -5131,42 +5171,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cette bipolarisation de l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bipolarisation de l' Europe",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Cette bipolarisation de l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de l' Europe",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la Triple Alliance (Allemagne, Autriche - Hongrie, Italie) -la Triple Entente (France, Russie, Royaume-Uni)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la Triple Alliance (Allemagne, Autriche - Hongrie, Italie) -la Triple Entente (France, Russie, Royaume-Uni)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la bipolarisation de l' Europe",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -5253,33 +5294,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bipolarisation de l' Europe",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bipolarisation de l' Europe",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Cette bipolarisation de l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bipolarisation de l' Europe",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bipolarisation de l' Europe",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bipolarisation de l' Europe",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la bipolarisation de l'Europe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -5301,12 +5349,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -5393,33 +5435,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "qu' on m' envoie illico sur le front et dans le régiment de mon beau-frère, le caporal Paul Delroze",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à titre d' athlète plus que complet et de lauréat de toutes les sociétés de gymnastique et de préparation militaire, je désire qu' on m' envoie illico sur le front et dans le",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "on m' envoie illico sur le front et dans le régiment de mon beau-frère, le caporal Paul Delroze",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "qu' on m' envoie illico sur le front et dans le régiment de mon beau-frère,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "qu'on m'envoie illico sur le front et dans le régiment de mon beau-frère, le caporal Paul Delroze.",
-                  "rougeL": 0.5185185185185185
+                  "rougeL": 0.5185185185185185,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "qu' on m' envoie illico sur le front et dans le régiment de mon beau-frère, le caporal Paul Delroze.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "qu'on m'envoie illico sur le front et dans le régiment+ de mon beau-frère, le caporal Paul Delroze.",
-                  "rougeL": 0.5185185185185185
+                  "rougeL": 0.5185185185185185,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -5441,12 +5490,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -5518,33 +5561,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "XVIIIe et XIXe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Au XVIIIe et XIXe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Au XVIIIe et XIXe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "XVIIIe et XIXe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "XVe siècle, XVIIIe et XIXe",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "XVIIIe et XIXe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au XVIIIe et XIXe siècle",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -5572,12 +5622,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -5640,33 +5684,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "XVIIIe et XIXe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "XVIIIe et XIXe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Au XVIIIe et XIXe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "XVIIIe et XIXe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "XVe siècle, XVIIIe et XIXe",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "XVIIIe et XIXe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Au XVIIIe et XIXe siècle",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -5706,12 +5757,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -5798,33 +5843,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Europe",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -5840,12 +5892,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -5926,33 +5972,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "29 juin 1914",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.6
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "29 juin 1914",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.6
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "29 juin 1914",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.6
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sur-le-champ.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.6
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur-le-champ",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.6
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sur-le-champ",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.6
                 },
                 "GPT-3.5": {
                   "answer_pred": "fut arrêté sur-le-champ",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.55
                 }
               },
               "human_annot": {
@@ -5998,7 +6051,7 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
+                "llama-2_lora": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
@@ -6040,7 +6093,7 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2_lora": [
+                "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
@@ -6082,7 +6135,7 @@
                     "rating": "Correct"
                   }
                 ],
-                "mixtral-8x7b": [
+                "ground_truth": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
@@ -6124,7 +6177,7 @@
                     "rating": "Correct"
                   }
                 ],
-                "ground_truth": [
+                "GPT-3.5": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
@@ -6143,7 +6196,7 @@
                   },
                   {
                     "annot": "annot_5",
-                    "rating": "Partiellement correct"
+                    "rating": "Erreur acceptable (\"humaine\")"
                   },
                   {
                     "annot": "annot_6",
@@ -6166,91 +6219,49 @@
                     "rating": "Correct"
                   }
                 ],
-                "GPT-3.5": [
+                "FLAN-T5-large": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
                   },
                   {
                     "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_3",
-                    "rating": "Partiellement correct"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_4",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_6",
-                    "rating": "Correct"
+                    "rating": "Erreur acceptable (\"humaine\")"
                   },
                   {
                     "annot": "annot_7",
-                    "rating": "Correct"
+                    "rating": "Erreur acceptable (\"humaine\")"
                   },
                   {
                     "annot": "annot_8",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_9",
-                    "rating": "Correct"
+                    "rating": "Erreur acceptable (\"humaine\")"
                   },
                   {
                     "annot": "annot_10",
-                    "rating": "Correct"
+                    "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "FLAN-T5-large": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
-                "MT5-large": [
+                "MT5-large": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
@@ -6396,33 +6407,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le corbeau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "corbeau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ils mangeaient gravement en criant de temps en temps",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "corbeau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les corbeaux",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les corbeaux",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le corbeau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -6462,12 +6480,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -6526,15 +6538,15 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "vers les mauvaises terres",
                   "rougeL": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vers les mauvaises terres",
                   "rougeL": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vers les mauvaises terres",
                   "rougeL": 1.0
                 },
@@ -6606,33 +6618,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "groupes financiers importants d' Europe",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -6660,12 +6679,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -6729,33 +6742,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "groupes financiers importants d' Europe",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.55
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.55
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.55
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.55
                 },
                 "llama-2_lora": {
                   "answer_pred": "M. Germain",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.05
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tous les groupes financiers importants d' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.55
                 },
                 "GPT-3.5": {
                   "answer_pred": "tous les groupes financiers importants d' Europe.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.55
                 }
               },
               "human_annot": {
@@ -6927,48 +6947,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -7137,16 +7115,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "question": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Question douteuse"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Question douteuse"
-                  }
-                ],
                 "no_answer": [
                   {
                     "annot": "annot_8",
@@ -7198,33 +7166,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "artilleurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cathédrale",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "artilleurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "artilleurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les artilleurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les artilleurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les artilleurs allemands.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -7252,12 +7227,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -7323,33 +7292,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "allemands",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "allemands",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "artilleurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "allemands",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemands",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "allemands",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les artilleurs allemands.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -7377,12 +7353,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -7472,33 +7442,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sa pensée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sa pensée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sa pensée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa pensée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "la manière de penser du général",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sa pensée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "\"sa pensée\" est exprimée de manière directe et incisive.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -7538,12 +7515,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -7602,33 +7573,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la polémique éclate entre les deux régions linguistiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la polémique éclate entre les deux régions linguistiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la polémique éclate entre les deux régions linguistiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la polémique éclate entre les deux régions linguistiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la polémique éclate entre les deux régions linguistiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la polémique éclate entre les deux régions linguistiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la polémique éclate entre les deux régions linguistiques.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -7662,12 +7640,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -7730,33 +7702,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -7790,12 +7769,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -7852,33 +7825,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -7906,12 +7886,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -7974,33 +7948,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -8034,12 +8015,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -8102,33 +8077,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une province allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "province allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une province allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une province allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une province allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une province allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une province allemande.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -8156,12 +8138,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -8227,33 +8203,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "allemande",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "allemande",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "allemande",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Allemagne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Allemagne.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -8263,12 +8246,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -8358,33 +8335,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "20 % de socialistes siégeront dès 1919",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "20 % de socialistes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le Conseil national.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -8424,12 +8408,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -8480,33 +8458,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Conseil national",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -8540,12 +8525,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -8608,33 +8587,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le doux soleil d' avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le doux soleil d' avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le doux soleil d' avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le doux soleil d' avril,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le doux soleil d' avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "doux soleil d' avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le doux soleil d'avril",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -8674,12 +8660,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -8730,33 +8710,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le doux soleil d' avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le doux soleil d' avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "doux soleil d' avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le doux soleil d' avril,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "doux soleil d' avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "doux soleil d' avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le doux soleil d'avril",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -8796,12 +8783,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -8858,33 +8839,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "magistrats et les fonctionnaires de la police judiciaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "magistrats et les fonctionnaires de la police judiciaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les magistrats et les fonctionnaires de la police judiciaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les magistrats et les fonctionnaires de la police judiciaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "soldats allemands",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les assassins",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les magistrats et les fonctionnaires de la police judiciaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -8906,12 +8894,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -8981,33 +8963,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "magistrats et les fonctionnaires de la police judiciaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "magistrats et les fonctionnaires de la police judiciaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "magistrats et les fonctionnaires de la police judiciaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les magistrats et les fonctionnaires de la police judiciaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "soldats allemands",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "magistrats et les fonctionnaires de la police judiciaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les magistrats et les fonctionnaires de la police judiciaire.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -9047,12 +9036,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -9110,33 +9093,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "réconciliation humaine et de recherche du bonheur social",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "de la liberté",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -9176,12 +9166,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -9232,33 +9216,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "social",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "social",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le bonheur social",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -9280,12 +9271,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -9360,33 +9345,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -9426,12 +9418,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -9482,33 +9468,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -9548,12 +9541,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -9610,33 +9597,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -9670,12 +9664,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -9732,33 +9720,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nommé président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pedraic Pearse est nommé président du gouvernement provisoire et commandant en chef des forces républicaines.",
-                  "rougeL": 0.6956521739130436
+                  "rougeL": 0.6956521739130436,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -9798,12 +9793,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -9854,33 +9843,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "président du gouvernement provisoire",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "président du gouvernement provisoire et commandant en chef des forces républicaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pedraic Pearse est nommé président du gouvernement provisoire et commandant en chef des forces républicaines.",
-                  "rougeL": 0.6956521739130436
+                  "rougeL": 0.6956521739130436,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -9908,12 +9904,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -9982,33 +9972,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la Suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Suisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -10036,12 +10033,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -10111,33 +10102,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les principes de justice gouvernent la conscience publique.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -10165,12 +10163,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -10233,33 +10225,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la conscience publique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -10299,12 +10298,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -10361,33 +10354,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -10415,12 +10415,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -10483,33 +10477,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "en Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Ulster",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -10525,12 +10526,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -10623,33 +10618,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "germanophilie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "germanophilie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "germanophilie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ces trois affaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les trois affaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ces trois affaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les trois affaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -10695,12 +10697,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -10758,42 +10754,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' affaire des colonels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "L' affaire des colonels : à la fin de l' année 1915, le bruit se répand rapidement que deux hauts officiers de l' Etat - major se sont rendus coupables de trahison",
-                  "rougeL": 0.2727272727272727
+                  "rougeL": 0.2727272727272727,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' affaire Hoffmann : Le conseiller fédéral Arthur Hoffmann et Robert Grimm (socialiste bernois et conseiller national) s' entremettent en 1917 pour obtenir l' adhésion de la Russie ",
-                  "rougeL": 0.1212121212121212
+                  "rougeL": 0.1212121212121212,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "- L' affaire des colonels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "affaires vont affecter les relations entre les deux parties du pays ; en outre, elles vont affecter l' image de la neutralité de la Suisse à l' étranger.",
-                  "rougeL": 0.11764705882352941
+                  "rougeL": 0.11764705882352941,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "La lettre au Conseil fédéral du 20 juillet 1915, L' affaire des colonels, L' affaire Hoffmann",
-                  "rougeL": 0.375
+                  "rougeL": 0.375,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les affaires des colonels et l'affaire Hoffmann",
-                  "rougeL": 0.36363636363636365
+                  "rougeL": 0.36363636363636365,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -10887,33 +10884,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "retrouver les traditions qui nous guident en toutes matières sauf dans nos rapports avec l' Irlande",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "retrouver les traditions qui nous guident en toutes matières sauf dans nos rapports avec l' Irlande",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les traditions qui nous guident en toutes matières sauf dans nos rapports avec l' Irlande",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "retrouver les traditions qui nous guident en toutes matières sauf dans nos rapports avec l' Irlande",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Traditions qui nous guident en toutes matières sauf dans nos rapports avec l' Irlande",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "retrouver les traditions qui nous guident en toutes matières sauf dans nos rapports avec l' Irlande",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les traditions qui nous guident en toutes matières sauf dans nos rapports avec l' Irlande",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -10959,12 +10963,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -11015,33 +11013,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "munitions et les vivres à distribuer",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "munitions et les vivres à distribuer",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les munitions et les vivres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les munitions et les vivres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "munitions et vivres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "munitions et les vivres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les munitions et les vivres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -11087,12 +11092,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -11145,33 +11144,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de la préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de la préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -11211,12 +11217,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -11267,33 +11267,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "preparation générale insuffisante de toute notre armée",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la préparation générale insuffisante de toute notre armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -11333,12 +11340,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -11395,33 +11396,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "baïonnettes plantées aux fusils par quelques sections obstinées, clairons qui sonnent la charge, bonds suprêmes d' isolés héroïques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "officiers qui se font tuer debout, baonnettes plantées aux fusils par quelques sections obstinées, clairons qui sonnent la charge, bonds suprêmes d' isolés héro",
-                  "rougeL": 0.19999999999999998
+                  "rougeL": 0.19999999999999998,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "officiers qui se font tuer debout, baonnettes plantées aux fusils par quelques sections obstinées, clairons qui sonnent la charge, bonds suprêmes d' isolés héro",
-                  "rougeL": 0.19999999999999998
+                  "rougeL": 0.19999999999999998,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "baïonnettes plantées aux fusils par quelques sections obstinées, clairons qui sonnent la charge,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "tuer debout, baïonnettes plantées aux fusils par quelques sections obstinées, clairons qui sonnent la charge, bonds suprêmes d' isolés héroïques, rien n' y fait.",
-                  "rougeL": 0.06451612903225806
+                  "rougeL": 0.06451612903225806,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "feu",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le feu",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -11443,12 +11451,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -11523,33 +11525,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Gallieni, à Joffre ou à Foch",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Gallieni, à Joffre ou à Foch",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Allemagne. Sans ces malentendus, non seulement la bataille aurait été gagnée, mais la guerre tout entière.",
-                  "rougeL": 0.35294117647058826
+                  "rougeL": 0.35294117647058826,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemands",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "contre l'Allemagne",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -11577,12 +11586,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -11642,33 +11645,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Gallieni, à Joffre ou à Foch",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Allemagne",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -11708,12 +11718,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -11767,33 +11771,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des données sur les intentions militaires des Alliés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "données sur les intentions militaires des Alliés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le bulletin d'information de l'état-major suisse contient plusieurs données sur les intentions militaires des Alliés.",
-                  "rougeL": 0.5555555555555556
+                  "rougeL": 0.5555555555555556,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -11803,12 +11814,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -11889,33 +11894,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les intentions militaires des Alliés",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "intentions militaires des Alliés",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "intentions militaires des Alliés",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les intentions militaires des Alliés",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "données sur les intentions militaires des Alliés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur les intentions militaires des Alliés.",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -11949,12 +11961,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -12035,33 +12041,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ses enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ses enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ses enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ses enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ses enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots qui répond à la question est \"ses enfants\".",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -12089,12 +12102,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -12175,78 +12182,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ses enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ses enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ses enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ses enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "llama-2_lora": {
                   "answer_pred": "les enfants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.55
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "La voix de la patrie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.15
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots qui répond à la question est \"La voix de la patrie\".",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.05
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -12663,33 +12635,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Elle dénombrait avec fierté les pays qu' elle avait découverts et lancés dans le courant de la vie générale",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Europe,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Europe",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -12735,12 +12714,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -12791,33 +12764,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ambassadeur d' Angleterre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ambassadeur d' Angleterre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ambassadeur d' Angleterre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' ambassadeur d' Angleterre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "M. Asquith",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' ambassadeur d' Angleterre à Berlin",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'ambassadeur d'Angleterre",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -12839,12 +12819,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -12913,33 +12887,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ambassadeur d' Angleterre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ambassadeur d' Angleterre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ambassadeur d' Angleterre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' ambassadeur d' Angleterre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "M. Asquith",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' ambassadeur d' Angleterre à Berlin",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "M. Paul Cambon",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -12949,12 +12930,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -13041,33 +13016,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "répression",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "répression contre les bolcheviks",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "répression organisée contre les bolcheviks",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "répression organisée contre les bolcheviks",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mutineries dans la marine allemande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "répression organisée contre les bolcheviks",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la répression organisée contre les bolcheviks",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -13089,12 +13071,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -13163,42 +13139,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "répression",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "répression organisée contre les bolcheviks",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "répression organisée contre les bolcheviks",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bolcheviks",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mutineries dans la marine allemande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "répression organisée contre les bolcheviks en juillet",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la répression organisée contre les bolcheviks",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -13291,33 +13268,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -13357,12 +13341,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -13413,33 +13391,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de 1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -13467,12 +13452,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -13553,33 +13532,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "n' a pas l' air de le comprendre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "il envisage, c' est de ne commencer à réunir les forces nécessaires à combattre le crime qu' au moment où il aura constaté que l' adversaire a déjà passé aux actes",
-                  "rougeL": 0.08
+                  "rougeL": 0.08,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ne commencer à réunir les forces nécessaires à combattre le crime qu' au moment où il aura constaté que l' adversaire a déjà passé aux actes (...)",
-                  "rougeL": 0.08333333333333333
+                  "rougeL": 0.08333333333333333,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' adversaire a déjà passé aux actes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "ne pas jusque-là",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ne commencer à réunir les forces nécessaires à combattre le crime qu' au moment où il aura constaté que l' adversaire a déjà passé aux actes",
-                  "rougeL": 0.08333333333333333
+                  "rougeL": 0.08333333333333333,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le gouvernement zurichois ne comprend pas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -13589,12 +13575,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -13681,42 +13661,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' année 1914, sur l' ensemble de la guerre et sur la fin de la guerre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vous trouvez sur Cliotexte des textes sur le début de la guerre : l' année 1914, sur l' ensemble de la guerre et sur la fin de la guerre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "G. Pedroncini, 1917, les mutineries de l' armée française, coll. Archives Julliard - Gallimard, 1968",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "G. Pedroncini,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' année 1914, sur l' ensemble de la guerre et sur la fin de la guerre, utiles aussi à ce chapitre.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les lecteurs/trices de Cliotexte.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -13809,33 +13790,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la diversité la plus extrême",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions (...) posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions (...) posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la diversité la plus extrême",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "diversité la plus extrême",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la diversité la plus extrême",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -13881,12 +13869,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -13937,33 +13919,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "quelques exemplaires de l' original,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "exemplaires de l' original",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "quelques exemplaires de l'original",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -13997,12 +13986,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -14059,33 +14042,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' original",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' original",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' original",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' original,",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "soigneusement cachés par des Belges",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' original",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de l'original",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -14107,12 +14097,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -14205,33 +14189,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "rats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ils choisissaient d' abord les jeunes sans barbe sur les joues. Ils se mettaient en boule et ils commençaient à manger cette chair d' entre le nez et la bouche, puis le bord",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les jeunes sans barbe sur les joues",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "rats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les rats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les rats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les rats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -14265,12 +14256,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -14334,33 +14319,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": " Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -14400,12 +14392,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -14456,33 +14442,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Elizabeth I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -14522,12 +14515,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -14584,33 +14571,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -14620,12 +14614,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -14709,33 +14697,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -14763,12 +14758,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -14852,33 +14841,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Henry Floch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Henry Floch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Lucie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Henry Floch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Henry Floch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Henry Floch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Henry Floch et cinq de ses camarades (Durantet, Blanchard, Gay, Pettelet et Quinault)",
-                  "rougeL": 0.45454545454545453
+                  "rougeL": 0.45454545454545453,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -14906,12 +14902,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -14980,33 +14970,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "quelques exemplaires de l' original,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "exemplaires de l' original",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -15046,12 +15043,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -15102,33 +15093,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "quelques exemplaires de l' original",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "quelques exemplaires de l' original,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "soigneusement cachés par des Belges",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "exemplaires de l' original",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les exemplaires de l'original",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -15138,12 +15136,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -15230,33 +15222,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Tous ces défauts provenaient de la préparation générale insuffisante de toute notre armée et devinrent particulièrement apparents au moment de notre première rencontre avec un adversaire plus habile que nous",
-                  "rougeL": 0.17391304347826084
+                  "rougeL": 0.17391304347826084,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "reconnaissances défectueuses, de liaisons mal organisées, de mauvaises appréciations de la situation et de maints autres défauts de commandement et de manoeuvre",
-                  "rougeL": 0.09999999999999999
+                  "rougeL": 0.09999999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "retours défectueuses, de liaisons mal organisées, de mauvaises appréciations de la situation et de maints autres défauts de commandement et de manoeuvre",
-                  "rougeL": 0.10526315789473684
+                  "rougeL": 0.10526315789473684,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mauvaises appréciations de la situation et de maints autres défauts de commandement et de manoeuvre",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tous ces défauts provenaient de la préparation générale insuffisante de toute notre armée et devinrent particulièrement apparents au moment de notre première rencontre avec un adversaire plus habile que nous.",
-                  "rougeL": 0.17391304347826084
+                  "rougeL": 0.17391304347826084,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "devinrent particulièrement apparents au moment de notre première rencontre avec un adversaire plus habile que nous",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les tristes résultats de reconnaissances défectueuses, de liaisons mal organisées, de mauvaises appréciations de la situation et de maints autres défauts de commandement et de manoeuvre.",
-                  "rougeL": 0.08695652173913042
+                  "rougeL": 0.08695652173913042,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -15302,12 +15301,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -15358,42 +15351,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions (...) posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions (...) posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions (...) posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la diversité la plus extrême",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "diversité la plus extrême",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la diversité la plus extrême",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -15480,33 +15474,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions (...) posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions (...) posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions (...) posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la diversité la plus extrême",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "diversité la plus extrême",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la diversité la plus extrême en ce qui concerne les questions posées par ce conflit",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la diversité la plus extrême",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -15528,12 +15529,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -15602,33 +15597,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "notre idéal de réconciliation humaine et de recherche du bonheur social ne sombre pas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "camarades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "camarades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "notre idéal de réconciliation humaine et de recherche du bonheur social ne sombre pas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les soldats de la liberté",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "notre idéal",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Nous (les soldats de la liberté) recherchons le bonheur social.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -15662,12 +15664,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -15734,33 +15730,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vikings",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vikings",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les Vikings",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les Vikings",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vikings",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les Vikings",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les Vikings",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -15800,12 +15803,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -15862,33 +15859,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la guerre servira à la défense de la patrie, de la démocratie, à la libération des peuples opprimés",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vos opinions, vos sentiments, vos souffrances",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vos opinions, vos sentiments, vos souffrances",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vos opinions, vos sentiments, vos souffrances.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "votre opinion, vos sentiments, vos souffrances",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vos opinions, vos sentiments, vos souffrances",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -15934,12 +15938,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -15990,33 +15988,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "inférieur à 400 grammes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "400 grammes",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "400 grammes",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "400 grammes",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "400 grammes",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "inférieur à 400 grammes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "inférieur à 400 grammes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -16032,12 +16037,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -16130,33 +16129,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "redonnez -nous l' offensive",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "redonnez -nous l' offensive comme vous l' avez donnée à ceux qui les ont enfoncés, ne nous laissez pas succomber à la teutonisation, délivrez -nous",
-                  "rougeL": 0.10526315789473684
+                  "rougeL": 0.10526315789473684,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' offensive comme vous l' avez donnée",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' offensive",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "poing quotidien",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' offensive",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'offensive",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -16178,12 +16184,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -16258,33 +16258,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ulrich Wille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ulrich Wille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ulrich Wille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' Ulrich Wille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ulrich Wille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ulrich Wille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ulrich Wille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -16324,12 +16331,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -16386,33 +16387,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un matériel de plus en plus important",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un matériel de plus en plus important",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un matériel de plus en plus important",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "matériel de plus en plus important",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un matériel de plus en plus important",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un matériel de plus en plus important",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un matériel de plus en plus important",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -16440,12 +16448,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -16514,33 +16516,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Von Klück,",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -16568,12 +16577,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -16636,33 +16639,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Von Klück,",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Von Klück",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -16702,12 +16712,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -16764,42 +16768,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "juillet - août",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "août",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -16892,33 +16897,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "toute question politique ou domestique se rapportant entièrement ou en partie à leurs affaires intérieures",
-                  "rougeL": 0.375
+                  "rougeL": 0.375,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "toute question politique ou domestique se rapportant entièrement ou en partie à leurs affaires intérieures",
-                  "rougeL": 0.375
+                  "rougeL": 0.375,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "que toute question politique ou domestique se rapportant entièrement ou en partie à leurs affaires intérieures",
-                  "rougeL": 0.375
+                  "rougeL": 0.375,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "seul ressort de la juridiction des États-Unis,",
-                  "rougeL": 0.22727272727272727
+                  "rougeL": 0.22727272727272727,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "exclusif de décider quelles questions dépendent de leur juridiction intérieure",
-                  "rougeL": 0.043478260869565216
+                  "rougeL": 0.043478260869565216,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "déclarent que toute question politique ou domestique se rapportant entièrement ou en partie à leurs affaires intérieures, incluant l' immigration, le travail, le trafic côtier, les tarifs, le commerce (...) et tout autre question domestique sont du seul ressort de la juridiction des États-Unis, et ne doivent pas, par ce traité, être soumis d' aucune façon à l' arbitrage ou à la considération du Conseil ou de l' Assemblée de la Société des Nations.",
-                  "rougeL": 0.9873417721518987
+                  "rougeL": 0.9873417721518987,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les États-Unis se réservent le droit exclusif de décider quelles questions dépendent de leur juridiction intérieure.",
-                  "rougeL": 0.07843137254901959
+                  "rougeL": 0.07843137254901959,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -16958,12 +16970,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -17020,33 +17026,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "si, dans une guerre entre l' Allemagne et la Russie, il restera neutre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "si, dans une guerre entre l' Allemagne et la Russie, il restera neutre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "si, dans une guerre entre l' Allemagne et la Russie, il restera neutre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "remise des forteresses de Toul et Verdun",
-                  "rougeL": 0.10526315789473685
+                  "rougeL": 0.10526315789473685,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "si, ce qu' il n' y a pas lieu de supposer, le gouvernement français déclare qu' il reste neutre, je prie votre Excellence de déclarer au gouvernement français que nous devons exiger comme gage de sa neutralité la remise des forteresses de Toul et Verdun, que nous occuperons et que nous restituerons après que la guerre avec la Russie est terminée.",
-                  "rougeL": 0.20833333333333334
+                  "rougeL": 0.20833333333333334,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "remise des forteresses de Toul et Verdun",
-                  "rougeL": 0.10526315789473685
+                  "rougeL": 0.10526315789473685,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "La remise des forteresses de Toul et Verdun.",
-                  "rougeL": 0.10526315789473685
+                  "rougeL": 0.10526315789473685,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -17092,12 +17105,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -17148,33 +17155,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "On",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "On",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "On",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "On trouve de la terre partout, dans ses poches, dans son mouchoir, dans ses habits, dans ce qu' on mange.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "touche que j'ai",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "on",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le protagoniste qui retrouve de la terre partout dans ses poches, dans son mouchoir, dans ses habits, dans ce qu'on mange est mentionné dans l'article.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -17220,12 +17234,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -17276,33 +17284,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "examen des autres questions territoriales et politiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "examen des autres questions territoriales et politiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "examen des autres questions territoriales et politiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' examen des autres questions territoriales et politiques,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "diriger l'examen des autres questions territoriales et politiques",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' examen des autres questions territoriales et politiques, et notamment celles relatives à l' Arménie, aux Etats balkaniques et aux territoires faisant partie de l' ancien royaume de Pologne",
-                  "rougeL": 0.37037037037037035
+                  "rougeL": 0.37037037037037035,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le même esprit d'équité et de justice commande l'examen des autres questions territoriales et politiques",
-                  "rougeL": 0.5000000000000001
+                  "rougeL": 0.5000000000000001,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -17324,12 +17339,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -17404,33 +17413,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' auteur de l' attentat,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -17458,12 +17474,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -17526,33 +17536,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Cabrinovic",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -17592,12 +17609,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -17654,33 +17665,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' ennemi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ennemi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ennemi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' ennemi.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'ennemi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' ennemi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "On cherche en vain l'ennemi.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -17696,12 +17714,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -17800,33 +17812,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fête spontanée, organisée par des pacifistes marginaux et des cercles de la jeunesse de gauche, pour fêter la victoire des bolcheviques en Russie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fête spontanée, organisée par des pacifistes marginaux et des cercles de la jeunesse de gauche, pour fêter la victoire des bolcheviques en Russie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "fête spontanée, organisée par des pacifistes marginaux et des cercles de la jeunesse de gauche, pour fêter la victoire des bolcheviques en Russie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "émeute de Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "émeute de Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "émeute de Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'émeute de Zurich",
-                  "rougeL": 0.8000000000000002
+                  "rougeL": 0.8000000000000002,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -17866,12 +17885,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -17929,33 +17942,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vous trouvez sur Cliotexte d' autres textes de Stephan Zweig",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Vous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Stefan Zweig",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cliotexte",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Cliotexte",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -17971,12 +17991,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -18057,15 +18071,15 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le gros des forces sur les communications principales de l' ennemi afin de l' obliger soit à abandonner rapidement ses fronts actuels, soit à accepter de nouveaux combats dans les plus mauvaises conditions",
                   "rougeL": 0.19999999999999998
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "toutes les forces ennemies",
                   "rougeL": 0.3333333333333333
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "toutes les forces ennemies",
                   "rougeL": 0.3333333333333333
                 },
@@ -18139,33 +18153,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "confiance et la gratitude",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "spectacle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "spectacle qu' il a donné et le loyal concours qu' il nous a apporté",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "spectacle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "le spectacle qu' il a donné et le loyal concours qu' il nous a apporté",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "loyal concours",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le loyal concours",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -18199,12 +18220,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -18267,33 +18282,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1920",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -18315,12 +18337,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -18386,33 +18402,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1920",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1919 déjà",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -18440,12 +18463,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -18511,33 +18528,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pedraic Pearse",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -18571,12 +18595,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -18639,33 +18657,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Amérique du Sud",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -18693,12 +18718,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -18767,33 +18786,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vous, mes petites, vous ne vous marierez pas, il faudra travailler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vous, mes petites, vous ne vous marierez pas, il faudra travailler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Vous, mes petites, vous ne vous marierez pas, il faudra travailler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vous, mes petites, vous ne vous marierez pas, il faudra travailler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "\" Vous, mes petites, vous ne vous marierez pas, il faudra travailler \"",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vous, mes petites, vous ne vous marierez pas, il faudra travailler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "\"Vous, mes petites, vous ne vous marierez pas, il faudra travailler\"",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -18827,12 +18853,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -18889,33 +18909,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vous, mes petites, vous ne vous marierez pas, il faudra travailler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vous, mes petites, vous ne vous marierez pas, il faudra travailler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Vous, mes petites, vous ne vous marierez pas, il faudra travailler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vous, mes petites, vous ne vous marierez pas, il faudra travailler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "il y avait de l' amertume dans sa voix.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vous, mes petites, vous ne vous marierez pas, il faudra travailler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "\"Vous, mes petites, vous ne vous marierez pas, il faudra travailler\"",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -18949,12 +18976,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -19017,33 +19038,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la mobilisation des femmes et des enfants",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à l'armée",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -19083,12 +19111,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -19139,33 +19161,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "espoir qu' une prompte victoire dissipera ce cauchemar",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'armée",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -19205,12 +19234,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -19267,33 +19290,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la paix revient.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -19327,12 +19357,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -19389,33 +19413,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la paix revient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la paix revient.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -19449,12 +19480,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -19517,33 +19542,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chef de l' Etat - major général",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chef de l' État",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chef de l' Etat - major général",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chef de l' Etat - major général.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "chef de l' Etat - major général",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "chef de l' Etat - major général",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "chef de l'Etat-major général",
-                  "rougeL": 0.4000000000000001
+                  "rougeL": 0.4000000000000001,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -19571,12 +19603,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -19639,33 +19665,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Etat - major général",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' Etat",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' Etat - major général",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Etat - major général.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Etat - major général",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Etat - major général",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Theophil von Sprecher est nommé chef de l'Etat-major général.",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -19681,12 +19714,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -19767,33 +19794,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trop tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au moment où il aura constaté que l' adversaire a déjà passé aux actes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trop tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "au moment où il aura constaté que l' adversaire a déjà passé aux actes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "quand la révolte a atteint son plein développement",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "trop tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au moment où il aura constaté que l'adversaire a déjà passé aux actes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -19809,12 +19843,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -19889,33 +19917,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trop tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "trop tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trop tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "trop tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "trop tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "trop tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "toujours trop tard",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -19943,12 +19978,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -20017,33 +20046,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -20299,48 +20335,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -20463,33 +20457,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des sociétés pour la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -20511,12 +20512,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -20591,33 +20586,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -20651,12 +20653,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -20713,33 +20709,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Hoffmann",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hoffmann.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -20773,12 +20776,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -20841,33 +20838,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des données sur les intentions militaires des Alliés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "données sur les intentions militaires des Alliés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le bulletin d'information de l'Etat-major suisse, qui contient plusieurs données sur les intentions militaires des Alliés.",
-                  "rougeL": 0.5555555555555556
+                  "rougeL": 0.5555555555555556,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -20907,12 +20911,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -20963,33 +20961,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "intentions militaires des Alliés",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les intentions militaires des Alliés",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plusieurs données sur les intentions militaires des Alliés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les intentions militaires des Alliés.",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -21029,12 +21034,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -21091,33 +21090,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -21151,12 +21157,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -21210,33 +21210,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1908 - 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -21270,12 +21277,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -21335,33 +21336,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "août",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "août",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -21401,12 +21409,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -21463,33 +21465,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -21523,12 +21532,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -21587,33 +21590,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "professeur",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "professeur",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "professorat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -21653,12 +21663,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -21729,33 +21733,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' armée ne serait plus en mesure de prévenir la sédition ou de la tuer dans l' oeuf",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "armée",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "révolutionnaires et pour le pouvoir",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la sédition",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la révolte",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la sédition",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la sédition",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -21771,12 +21782,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -21875,33 +21880,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sans trop le connaître",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le peuple, sans trop le connaître, c'est-ce que le peuple connaît, c'est-ce que le peuple connaît, c'est-ce que le peuple connaît, c'est-ce que le peuple connaît, c'est-ce que",
-                  "rougeL": 0.05555555555555555
+                  "rougeL": 0.05555555555555555,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Tchèques se flattaient qu' il leur serait secourable, à cause de l' influence que sa femme, issue d' une de vieilles familles de Bohême, exerçait sur son esprit",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "son fanatisme clérical et de son avarice",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le peuple ne connaît pas le prince",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fanatisme clérical et de son avarice",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le peuple connaît son fanatisme clérical et son avarice",
-                  "rougeL": 0.9
+                  "rougeL": 0.9,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -21917,12 +21929,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -22007,33 +22013,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "je collais à l' intérieur de cette reliure",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "je collais à l' intérieur de cette reliure",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "je collais à l' intérieur de cette reliure",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "je collais à l' intérieur de cette reliure,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "moi, je collais à l' intérieur de cette reliure, pour cacher les bouts de l' élastique, mes petits carrés de papier rose.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "je",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la personne non spécifiée dans cet extrait de l'article",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -22049,12 +22062,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -22136,33 +22143,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -22190,12 +22204,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -22277,33 +22285,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "redonnez -nous l' offensive",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "redonnez -nous l' offensive comme vous l' avez donnée à ceux qui les ont enfoncés",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' offensive",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' offensive",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "poing quotidien",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' offensive",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'offensive",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -22343,12 +22358,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -22405,33 +22414,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Plusieurs jeunes et jolies Arméniennes liées ensemble",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Arméniennes liées ensemble",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Plusieurs jeunes et jolies Arméniennes liées ensemble",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Plusieurs jeunes et jolies Arméniennes",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des jeunes et jolies Arméniennes",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "jeunes et jolies Arméniennes",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Plusieurs jeunes et jolies Arméniennes",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -22465,12 +22481,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -22534,33 +22544,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "20 obus à la minute",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "20 obus à la minute",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "20 obus à la minute",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "20",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "20 obus à la minute",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "20 obus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 75 tire 20 obus à la minute.",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -22594,12 +22611,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -22656,33 +22667,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "20 obus à la minute",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "20 obus à la minute",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "20 obus à la minute",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "20 obus à la minute",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "20 obus à la minute",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "20 obus à la minute",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des obus",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -22716,12 +22734,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -22784,33 +22796,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 0.9285714285714286
+                  "rougeL": 0.9285714285714286,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 0.9285714285714286
+                  "rougeL": 0.9285714285714286,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables, d' un poids inférieur à 400 grammes",
-                  "rougeL": 0.8823529411764706
+                  "rougeL": 0.8823529411764706,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les projectiles explosibles ou chargés de matières fulminantes ou inflammables, d' un poids inférieur à 400 grammes",
-                  "rougeL": 0.8823529411764706
+                  "rougeL": 0.8823529411764706,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -22820,12 +22839,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -22906,33 +22919,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les projectiles explosibles ou chargés de matières fulminantes ou inflammables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -22972,12 +22992,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -23034,33 +23048,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les poches, dans son mouchoir, dans ses habits",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "hantise, un cauchemar de terre et de boue, et vous ne sauriez avoir idée de la touche que j' ai - mon fusil a l' air d'",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une hantise, un cauchemar de terre et de boue",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "terre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de la terre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "On trouve de la terre partout, dans ses poches, dans son mouchoir, dans ses habits, dans ce qu'on mange.",
-                  "rougeL": 0.19999999999999998
+                  "rougeL": 0.19999999999999998,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -23082,12 +23103,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -23164,33 +23179,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les belligérants n' ont pas le droit d' utiliser son territoire comme champ de bataille ou voie de passage ; ils ne peuvent pas le soumettre à un blocus économique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "droit : les belligérants n' ont pas le droit d' utiliser son territoire comme champ de bataille ou voie de passage ; ils ne peuvent pas le soumettre à un blocus économique",
-                  "rougeL": 0.9714285714285714
+                  "rougeL": 0.9714285714285714,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' Etat ne peut être rendu responsable de manifestations d' opinions privées de ses ressortissants",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "champ de bataille ou voie de passage",
-                  "rougeL": 0.38095238095238093
+                  "rougeL": 0.38095238095238093,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "droit : les belligérants n' ont pas le droit d' utiliser son territoire comme champ de bataille ou voie de passage ; ils ne peuvent pas le soumettre à un blocus économique.",
-                  "rougeL": 0.9714285714285714
+                  "rougeL": 0.9714285714285714,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "seuls les organes officiels de l' Etat peuvent être rendus responsables de manifestations d' opinion ; par contre, l' Etat ne peut être rendu responsable de manifestations d' opinions privées de ses ressortissants. On ne peut donc pas mettre d' entraves à la liberté de presse. S' il a des devoirs à accomplir, un Etat neutre a aussi des droits : les belligérants n' ont pas le droit d' utiliser son territoire comme champ de bataille ou voie de passage ; ils ne peuvent pas le soumettre à un blocus économique.",
-                  "rougeL": 0.4722222222222222
+                  "rougeL": 0.4722222222222222,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un Etat neutre a aussi des droits ",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -23236,12 +23258,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -23293,37 +23309,44 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "deux ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.65
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Il y a deux ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Suisse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Il y a deux ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
                 "llama-2_lora": {
                   "answer_pred": "deux ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.65
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux ans auparavant",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.8
                 },
                 "GPT-3.5": {
                   "answer_pred": "il y a deux ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.75
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
+                "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
@@ -23365,7 +23388,7 @@
                     "rating": "Correct"
                   }
                 ],
-                "mixtral-8x7b": [
+                "MT5-large": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
@@ -23400,14 +23423,14 @@
                   },
                   {
                     "annot": "annot_9",
-                    "rating": "Correct"
+                    "rating": "Partiellement correct"
                   },
                   {
                     "annot": "annot_10",
-                    "rating": "Correct"
+                    "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "MT5-large": [
+                "llama-2_lora": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
@@ -23449,7 +23472,49 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2_lora": [
+                "FLAN-T5-large": [
+                  {
+                    "annot": "annot_1",
+                    "rating": "Erreur inacceptable"
+                  },
+                  {
+                    "annot": "annot_2",
+                    "rating": "Erreur inacceptable"
+                  },
+                  {
+                    "annot": "annot_3",
+                    "rating": "Erreur inacceptable"
+                  },
+                  {
+                    "annot": "annot_4",
+                    "rating": "Erreur inacceptable"
+                  },
+                  {
+                    "annot": "annot_5",
+                    "rating": "Erreur inacceptable"
+                  },
+                  {
+                    "annot": "annot_6",
+                    "rating": "Erreur inacceptable"
+                  },
+                  {
+                    "annot": "annot_7",
+                    "rating": "Erreur inacceptable"
+                  },
+                  {
+                    "annot": "annot_8",
+                    "rating": "Erreur inacceptable"
+                  },
+                  {
+                    "annot": "annot_9",
+                    "rating": "Erreur acceptable (\"humaine\")"
+                  },
+                  {
+                    "annot": "annot_10",
+                    "rating": "Erreur inacceptable"
+                  }
+                ],
+                "GPT-3.5": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
@@ -23484,140 +23549,56 @@
                   },
                   {
                     "annot": "annot_9",
-                    "rating": "Partiellement correct"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
+                    "rating": "Partiellement correct"
                   }
                 ],
-                "FLAN-T5-large": [
+                "Camembert_baseline": [
                   {
                     "annot": "annot_1",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_3",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_4",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_6",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Erreur acceptable (\"humaine\")"
                   },
                   {
                     "annot": "annot_7",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_8",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Erreur acceptable (\"humaine\")"
                   },
                   {
                     "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
+                    "rating": "Correct"
                   },
                   {
                     "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
+                    "rating": "Correct"
                   }
                 ],
-                "GPT-3.5": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
-                "Camembert_baseline": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
-                "T5-large": [
+                "T5-large": [
                   {
                     "annot": "annot_1",
                     "rating": "Correct"
@@ -23758,33 +23739,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "missionnaires et ses diplomates",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Japonais",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les missionnaires et diplomates japonais.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -23800,12 +23788,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -23881,33 +23863,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -23935,12 +23924,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -24028,33 +24011,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sans trop le connaître",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Tchèques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Tchèques se flattaient qu' il leur serait secourable, à cause de l' influence que sa femme, issue d' une de vieilles familles de Bohême, exerçait sur son esprit",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ne l' aimait pas.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le jugeant sur ce qu' on rapportait de son fanatisme clérical et de son avarice",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son fanatisme clérical et de son avarice",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le peuple ne le connaît pas.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -24100,12 +24090,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -24156,33 +24140,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les moteurs à essence.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -24216,12 +24207,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -24278,33 +24263,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les moteurs à essence",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -24338,12 +24330,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -24406,33 +24392,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -24460,12 +24453,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -24529,33 +24516,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un nouveau mode de civilisation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -24589,12 +24583,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -24688,33 +24676,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Elle dénombrait avec fierté les pays qu' elle avait découverts et lancés dans le courant de la vie générale, les peuples qu' elle avait nourris de sa substance et façonnés à son image, les société",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Europe",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -24748,12 +24743,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -24816,33 +24805,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à la guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à la guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à la guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tué à la guerre",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à la guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'est pas indiqué dans l'article comment Albert Barbet a été tué.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -24882,12 +24878,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -24938,33 +24928,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à la guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'y a pas de mention de l'événement au cours duquel Albert Barbet est tué dans l'article.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -24992,12 +24989,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -25066,33 +25057,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le monstre germanique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "monstre germanique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le monstre germanique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "monstre germanique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le monstre germanique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le monstre germanique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le monstre germanique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -25126,12 +25124,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -25194,33 +25186,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "aux affaires que le retour de la paix et la liquidation des dépenses de la guerre",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "aux affaires que le retour de la paix et la liquidation des dépenses de la guerre",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "aux affaires que le retour de la paix et la liquidation des dépenses de la guerre",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "aux affaires que le retour de la paix et la liquidation des dépenses de la guerre (3) vont sans doute faire éclore en Russie.",
-                  "rougeL": 0.11764705882352941
+                  "rougeL": 0.11764705882352941,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "affaires que le retour de la paix et la liquidation des dépenses de la guerre vont sans doute faire éclore en Russie",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "aux affaires que le retour de la paix et la liquidation des dépenses de la guerre vont sans doute faire éclore en Russie",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "aux affaires que le retour de la paix et la liquidation des dépenses de la guerre",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -25266,12 +25265,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -25322,33 +25315,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -25388,12 +25388,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -25444,33 +25438,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jean Jaurès",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jean Jaurès",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jean Jaurès",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le cercueil de Jean Jaurès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -25498,12 +25499,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -25572,33 +25567,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le bourrage de crâne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bourrage de crâne exactions",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bourrage de crâne exactions",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bourrage de crâne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "bourrage de crâne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "exactions inventées de civils belges",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les exactions.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -25644,12 +25646,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -25700,15 +25696,15 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "France",
                   "rougeL": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "France",
                   "rougeL": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "France",
                   "rougeL": 1.0
                 },
@@ -25775,33 +25771,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -25835,12 +25838,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -25904,33 +25901,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "La Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -25940,12 +25944,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -26032,33 +26030,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des derniers temps qui précèdent l' agression nazie contre la Pologne",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des derniers temps qui précèdent l' agression nazie contre la Pologne",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "derniers temps qui précèdent l' agression nazie contre la Pologne",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "derniers temps qui précèdent l' agression nazie contre la Pologne.",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "après son décès",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "derniers temps qui précèdent l' agression nazie contre la Pologne",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des derniers temps qui précèdent l' agression nazie contre la Pologne",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -26104,12 +26109,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -26154,33 +26153,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "après son décès",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "après son décès",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des derniers temps qui précèdent l' agression nazie contre la Pologne",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "derniers temps qui précèdent l' agression nazie contre la Pologne.",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "après son décès",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "derniers temps qui précèdent l' agression nazie contre la Pologne",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le texte a été édité et une analyse du manuscrit a confirmé que celui-ci date bien des derniers temps qui précèdent l' agression nazie contre la Pologne.",
-                  "rougeL": 0.6875000000000001
+                  "rougeL": 0.6875000000000001,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -26190,12 +26196,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -26294,33 +26294,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ils",
-                  "rougeL": 0
+                  "rougeL": 0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "armée allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "armée allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les Allemands",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemands",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ils",
-                  "rougeL": 0
+                  "rougeL": 0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Aucune séquence de mots dans l'article ne répond à la question \"Qui mange de la paille ?\".",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -26330,12 +26337,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -26434,42 +26435,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ces moribonds",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ces moribonds",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Épuisés, à bout de souffle, luttant un contre deux depuis des jours, n' ayant pas le temps de dormir, n' ayant pas le temps de manger, ne marchant que par le prodige ",
-                  "rougeL": 0.09523809523809523
+                  "rougeL": 0.09523809523809523,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ces moribonds",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les plus humbles au plus illustres",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ces hommes-là",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les hommes/moribonds",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -26562,33 +26564,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "profiter de l' avantage du terrain et de l' occupation pour réserver ses forces",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' avantage de la défensive, qui est de profiter de l' avantage du terrain et de l' occupation pour réserver ses forces",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' avantage de la défensive, qui est de profiter de l' avantage du terrain et de l' occupation pour réserver ses forces",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' avantage de la défensive, qui est de profiter de l' avantage du terrain et de l' occupation pour réserver ses forces,",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'avantage de la défensive, qui est de profiter de l' avantage du terrain et de l' occupation pour réserver ses forces, se trouve perdu en partie, car on est obligé alors d' employer à la défensive passive des troupes qu' on aurait pu employer en meilleure place.",
-                  "rougeL": 0.08333333333333333
+                  "rougeL": 0.08333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' avantage de la défensive",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'avantage de la défensive a perdu.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -26610,12 +26619,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -26717,33 +26720,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "juste pour me mettre à table",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "J'",
-                  "rougeL": 0
+                  "rougeL": 0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' auteur ne raconte pas sa propre enfance, mais celle d' une petite fille à Paris pendant la Première Guerre, et qui a treize ou quatorze ans en 1918",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "J' étais entrée aux Piles électriques.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "quand je rencontrais une voiture à chevaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "J' arrivais à midi et demie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la petite fille",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -26753,12 +26763,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -26845,33 +26849,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Autriche contre la Russie",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Autriche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Autriche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Autriche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la France",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Autriche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Autriche",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -26899,12 +26910,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -26973,33 +26978,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Allemand sérieux, grave, allant toujours au fond des choses et trop indifférent à leur apparence",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Allemand sérieux",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Allemand",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Allemand",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' Allemand sérieux, grave, allant toujours au fond des choses et trop indifférent à leur apparence.",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Allemand sérieux, grave, allant toujours au fond des choses et trop indifférent à leur apparence",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Allemand sérieux, grave, allant toujours au fond des choses et trop indifférent à leur apparence.",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -27015,12 +27027,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -27101,33 +27107,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -27161,12 +27174,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -27229,33 +27236,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Quand l' aube n' était pas encore bien débarrassée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Quand l' aube n' était pas encore bien débarrassée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Quand l' aube n' était pas encore bien débarrassée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Quand l' aube n' était pas encore bien débarrassée,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à larges coups d' ailes tranquilles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Quand l' aube n' était pas encore bien débarrassée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Quand l'aube n'était pas encore bien débarrassée, les corbeaux arrivaient.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.95
                 }
               },
               "human_annot": {
@@ -27469,48 +27483,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -27675,33 +27647,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Quand l' aube n' était pas encore bien débarrassée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Quand l' aube n' était pas encore bien débarrassée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Quand l' aube n' était pas encore bien débarrassée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Quand l' aube n' était pas encore bien débarrassée,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à larges coups d' ailes tranquilles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' aube",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Quand l'aube n'était pas encore bien débarrassée",
-                  "rougeL": 0.6250000000000001
+                  "rougeL": 0.6250000000000001,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -27711,12 +27690,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -27803,33 +27776,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'infanterie ennemie",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -27869,12 +27849,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -27925,33 +27899,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "allemande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "L' infanterie ennemie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'infanterie ennemie",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -27973,12 +27954,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -28053,33 +28028,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans une tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans une tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans une tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans une tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -28095,12 +28077,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -28176,42 +28152,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "première ligne",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "première ligne",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans une tranchée de première ligne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -28310,33 +28287,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Une statuette en bronze et or, datant de l'époque d'Hammurabi.",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -28358,12 +28342,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -28432,33 +28410,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -28498,12 +28483,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -28560,33 +28539,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Howard Carter (1874 - 1939)",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -28620,12 +28606,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -28682,33 +28662,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Howard Carter (1874 - 1939)",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -28754,12 +28741,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -28840,33 +28821,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Galba",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jeunesse Galba",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jeunesse Galba",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Galba",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Galba",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Galba",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Galba",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -28900,12 +28888,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -28968,33 +28950,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans le nord",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans un environnement de collines et de vallées",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans un environnement de collines et de vallées",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans un environnement de collines et de vallées",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sud",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans le nord",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans le nord.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -29010,12 +28999,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -29096,33 +29079,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -29156,12 +29146,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -29224,33 +29208,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -29284,12 +29275,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -29346,33 +29331,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -29406,12 +29398,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -29474,33 +29460,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jules César.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -29714,48 +29707,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -29926,33 +29877,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les jardins suspendus de la reine Sémiramis une des sept merveilles du monde",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "apogée vers le VIe siècle av. J.-C durant le règne de Nabuchodonosor II",
-                  "rougeL": 0.21052631578947367
+                  "rougeL": 0.21052631578947367,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son apogée vers le VIe siècle av. J.-C. durant le règne de Nabuchodonosor II",
-                  "rougeL": 0.21052631578947367
+                  "rougeL": 0.21052631578947367,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "jardins suspendus de la reine Sémiramis",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "apogée vers le VIe siècle av. J.-C.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Babylone",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Babylone connaît son apogée vers le VIe siècle av. J.-C.",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -29992,12 +29950,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -30054,33 +30006,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son père",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son père",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son père",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "son père",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "son père",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son père",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il perd rapidement son père.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -30108,12 +30067,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -30182,33 +30135,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "conquête musulmane de la Perse",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la conquête musulmane de la Perse",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la conquête musulmane de la Perse",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "conquête musulmane de la Perse",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "651, lors de la conquête musulmane de la Perse",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "conquête musulmane de la Perse",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Yazdgard III est tué lors de la conquête musulmane de la Perse.",
-                  "rougeL": 0.5882352941176471
+                  "rougeL": 0.5882352941176471,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -30248,12 +30208,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -30311,33 +30265,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Phéniciens",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Phéniciens",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Phéniciens",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' alphabet des Phéniciens.",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' alphabet des Phéniciens",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "alphabet des Phéniciens",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'alphabet des Phéniciens",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -30377,12 +30338,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -30440,42 +30395,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "256 fantassins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "256 fantassins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "256 fantassins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "256 fantassins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "phalange macédonienne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fantassins",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la phalange macédonienne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -30568,33 +30524,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fabrication de la bière",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la fabrication de la bière",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fabrication de la bière",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ninkasi a créé la bière.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -30622,12 +30585,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -30690,33 +30647,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ninkasi a créé la bière.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -30744,12 +30708,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -30818,33 +30776,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sainte Geneviève",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sainte Geneviève",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sainte Geneviève",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sainte Geneviève",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sainte Geneviève",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la population galvanisée par sainte Geneviève",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sainte Geneviève",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -30884,12 +30849,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -30947,33 +30906,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Arius",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Arius un prêtre catholique d' Alexandrie",
-                  "rougeL": 0.8421052631578948
+                  "rougeL": 0.8421052631578948,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Arius",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Arius",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Arius",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Arius",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Arius un prêtre catholique d' Alexandrie",
-                  "rougeL": 0.8421052631578948
+                  "rougeL": 0.8421052631578948,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -31013,12 +30979,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -31069,33 +31029,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "d' Arius un prêtre catholique d' Alexandrie",
-                  "rougeL": 0.8421052631578948
+                  "rougeL": 0.8421052631578948,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "d' Arius un prêtre catholique d' Alexandrie",
-                  "rougeL": 0.8421052631578948
+                  "rougeL": 0.8421052631578948,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Arius un prêtre catholique d' Alexandrie",
-                  "rougeL": 0.8421052631578948
+                  "rougeL": 0.8421052631578948,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' Arius",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Arius, un prêtre catholique d' Alexandrie, d' origine libyenne, de la fin du IIe et du début du IVe siècle.",
-                  "rougeL": 0.7586206896551725
+                  "rougeL": 0.7586206896551725,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Arius un prêtre catholique d' Alexandrie, d' origine libyenne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "d'Alexandrie, d'origine libyenne",
-                  "rougeL": 0.39999999999999997
+                  "rougeL": 0.39999999999999997,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -31105,12 +31072,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -31197,33 +31158,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jean-Baptiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jean-Baptiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jean-Baptiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jean-Baptiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jean-Baptiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jean-Baptiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean-Baptiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -31257,12 +31225,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -31325,33 +31287,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Enki",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Namma",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Enki",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Enki",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Enki (son fils ou son petit-fils)",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Enki",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "À Enki",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -31391,12 +31360,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -31453,33 +31416,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "Camembert_baseline": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "llama-2_lora": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "GPT-3.5": {
                   "answer_pred": "le matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -31735,48 +31705,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -31899,33 +31827,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "villa Giulia",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "matériel archéologique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le matériel archéologique a été trouvé à l'extérieur de la Ville.",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -31953,12 +31888,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -32027,33 +31956,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "celui qu' elle choisirait lors du festin auquel il convia ses hôtes grecs",
-                  "rougeL": 0.8421052631578948
+                  "rougeL": 0.8421052631578948,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "festin",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "festin",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "festin",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le festin auquel il convia ses hôtes grecs",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les noces de Gyptis",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au festin",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -32081,12 +32017,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -32149,33 +32079,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "lors du festin auquel il convia ses hôtes grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "lors du festin",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "lors du festin",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "lors du festin auquel il convia ses hôtes grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "durant le banquet",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "au cours du banquet",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "lors du festin",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -32215,12 +32152,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -32277,42 +32208,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sassanides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Sassanides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Sassanides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Sassanides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Sassanides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Zeugma",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Shapur Ier attaque Zeugma.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -32399,33 +32331,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sassanides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Zeugma",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Sassanides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Zeugma",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Sassanides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Zeugma",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Zeugma",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -32453,12 +32392,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -32527,33 +32460,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les eaux de pluie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "eaux de pluie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "eaux de pluie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "eaux de pluie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "eaux de pluie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "eaux de pluie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'eau des deux fleuves, le Tigre et l'Euphrate.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -32593,12 +32533,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -32655,33 +32589,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "avant que Lutèce ne soit fondée par les Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "IIIe siècle av. J.-C.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Avant que Lutèce ne soit fondée par les Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Avant que Lutèce ne soit fondée par les Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "IIIe siècle av. J.-C.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "IIIe siècle av. J.-C.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Depuis le IIIe siècle av. J.-C.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -32703,12 +32644,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -32795,33 +32730,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Mars",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Mars",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Mars, fils de Jupiter et de Junon",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dieu de la guerre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Mars est fougueux, jeune, irréfléchi : il représente la jeunesse, à la différence de Saturne ou Jupiter qui représentent l' âge mûr. Mars est en effet aussi le dieu de l' agriculture, de la fertilité et de la végétation.",
-                  "rougeL": 0.07142857142857142
+                  "rougeL": 0.07142857142857142,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Mars (mythologie)",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Mars, fils de Jupiter et de Junon",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -32855,12 +32797,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -32923,33 +32859,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la plus importante collection au monde d' objets minoens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la plus importante collection au monde d' objets minoens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la plus importante collection au monde d' objets minoens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fresques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une grande partie des fresques trouvées dans les fouilles du palais de Cnossos",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Le musée archéologique d' Héraklion trouve la plus importante collection au monde d' objets minoens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "On y trouve la plus importante collection au monde d' objets minoens, dont une grande partie des fresques trouvées dans les fouilles du palais de Cnossos.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -32959,12 +32902,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -33055,33 +32992,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Phrygie",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Phrygie",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -33103,12 +33047,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -33177,33 +33115,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le roi de Phrygie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -33237,12 +33182,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -33305,33 +33244,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "combats de gladiateurs et de bêtes sauvages",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -33371,12 +33317,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -33428,33 +33368,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "gladiateurs et de bêtes sauvages",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "gladiateurs et de bêtes sauvages",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les gladiateurs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -33488,12 +33435,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -33557,33 +33498,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chimie, la physique et l' astronomie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chimie, la physique et l' astronomie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chimie, la physique et l' astronomie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chimie, la physique et l' astronomie.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Chimie, physique et astronomie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "chimie, physique et astronomie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la chimie, la physique et l' astronomie.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -33611,12 +33559,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -33703,33 +33645,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sainte Foy",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Sainte Foy",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Sainte Foy",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Sainte Foy Sainte Foy",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Sainte Foy",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Sainte Foy",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Sainte Foy",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -33763,12 +33712,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -33832,33 +33775,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sidon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Sidon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Sidon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Sidon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Sidon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Sidon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Sidon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -33892,12 +33842,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -33960,33 +33904,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "colons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les Phéniciens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "colons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Phéniciens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des colons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des colons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Grecs.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -33996,12 +33947,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -34088,33 +34033,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "entre 1551 et 1553",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "entre 1551 et 1553",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "entre 1551 et 1553",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "entre 1551 et 1553",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1551 - 1553",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "entre 1551 et 1553",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "entre 1551 et 1553",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -34154,12 +34106,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -34210,33 +34156,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "entre 1551 et 1553",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1551 et 1553",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "entre 1551 et 1553",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1551 et 1553",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1551 - 1553",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1551 - 1553",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "entre 1551 et 1553",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -34252,12 +34205,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -34350,33 +34297,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Tartare",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le Tartare",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le Tartare",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le Tartare",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tartare",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Tartare",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans le Tartare",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -34392,12 +34346,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -34508,33 +34456,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Galba",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jeunesse Galba",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jeunesse Galba",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Galba Galba",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Galba",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Galba",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Galba perd son père.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -34544,12 +34499,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -34636,33 +34585,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean-Baptiste fut arrêté.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -34702,12 +34658,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -34782,33 +34732,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Enfers",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les âmes des humains décédés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -34824,12 +34781,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -34910,33 +34861,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Tyr",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Tyr",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Tyr",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tyr",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tyr",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Tyr",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Tyr",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -34970,12 +34928,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -35038,33 +34990,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le déclin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le déclin s' installe et la ville fait l' objet de multiples combats et de guerres civiles",
-                  "rougeL": 0.19999999999999998
+                  "rougeL": 0.19999999999999998,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le déclin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le déclin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "immigrations",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le déclin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le déclin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -35092,12 +35051,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -35166,33 +35119,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "eau du fleuve Léthé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "eau du fleuve Léthé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "eau du fleuve Léthé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fleuve Léthé",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Léthé",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "eau du fleuve Léthé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'eau du fleuve Léthé",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -35232,12 +35192,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -35288,33 +35242,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "eau du fleuve Léthé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "eau du fleuve Léthé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "eau du fleuve Léthé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' eau du fleuve Léthé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Léthé",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' eau du fleuve Léthé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'eau du fleuve Léthé",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -35336,12 +35297,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -35416,42 +35371,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Aujourd'hui, on dit encore qu' elle est de « mauvaise humeur »",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Aujourd'hui",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "on dit encore",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Aujourd'hui,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Quatre éléments, quatre humeurs, quatre caractères selon la médecine antique.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "on",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "on dit encore",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -35544,33 +35500,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vitruve",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Aqueducs Vitruve",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Vitruve",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vitruve",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vitruve",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vitruve",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Vitruve",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -35598,12 +35561,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -35669,33 +35626,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cette ville, située dans une grande plaine, est de forme carrée ; chacun de ses côtés a cent vingt stades de long, ce qui fait pour l' enceinte de la place quatre cent quatre-vingts",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Babylone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "de Babylone : « Cette ville, située dans une grande plaine, est de forme carrée ; chacun de ses côtés a cent vingt stades de long, ce qui fait pour l' enceinte de la place quatre cent quatre-vingt",
-                  "rougeL": 0.14814814814814814
+                  "rougeL": 0.14814814814814814,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Cette ville, située dans une grande plaine, est de forme carrée",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Babylone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Babylone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de Babylone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -35711,12 +35675,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -35791,33 +35749,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cette ville, située dans une grande plaine, est de forme carrée ; chacun de ses côtés a cent vingt stades de long",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Babylone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cette ville, située dans une grande plaine, est de forme carrée ; chacun de ses côtés a cent vingt stades de long, ce qui fait pour l' enceinte de la place quatre cent quatre-vingts stades",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Cette ville, située dans une grande plaine, est de forme carrée",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de forme carrée",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Babylone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de Babylone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -35839,12 +35804,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -35919,33 +35878,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -35979,12 +35945,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -36041,33 +36001,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Metellus Scipion et Caton d' Utique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Metellus Scipion et Caton d'Utique",
-                  "rougeL": 0.9
+                  "rougeL": 0.9,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -36101,12 +36068,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -36169,33 +36130,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "musée archéologique d' Héraklion et le musée national archéologique d' Athènes",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "entre le musée archéologique d' Héraklion et le musée national archéologique d' Athènes",
-                  "rougeL": 0.11764705882352941
+                  "rougeL": 0.11764705882352941,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Cnossos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Cnossos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "entre le musée archéologique d' Héraklion et le musée national archéologique d' Athènes.",
-                  "rougeL": 0.11764705882352941
+                  "rougeL": 0.11764705882352941,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Cnossos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Cnossos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -36241,12 +36209,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -36292,33 +36254,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "musée archéologique d' Héraklion et le musée national archéologique d' Athènes",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Cnossos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Cnossos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Cnossos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Héraklion et Athènes.",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Cnossos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Cnossos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -36346,12 +36315,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -36421,33 +36384,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Grand Roi",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Grand Roi",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -36493,12 +36463,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -36543,33 +36507,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -36609,12 +36580,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -36665,33 +36630,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "le « Grand Roi »",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -36737,12 +36709,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -36805,33 +36771,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Howard Carter",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -36865,12 +36838,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -36933,33 +36900,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "premier record connu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le premier record connu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "95 pieds",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "premier record connu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "95 pieds",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le premier record connu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le premier record connu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -37005,12 +36979,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -37062,33 +37030,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Darius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Darius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Darius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Darius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Darius aurait envoyé une lettre à Alexandre le Grand après la bataille d' Issos, qui demande le retrait des troupes d' Asie et la libération des prisonniers et selon Justin et Quinte en échange d' une rançon, mais Arrian ne fait aucune allusion à une rançon ; Quinte décrit que le ton de la lettre était offensant, mais ses demandes sont refusées par Alexandre.",
-                  "rougeL": 0.12
+                  "rougeL": 0.12,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Darius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Darius aurait envoyé une lettre.",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -37116,12 +37091,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -37190,33 +37159,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -37244,12 +37220,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -37312,33 +37282,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "commerciaux",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "commerciaux",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des comptoirs commerciaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -37366,12 +37343,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -37440,33 +37411,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "argent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.16666666666666666
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "argent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.16666666666666666
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "argent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.16666666666666666
                 },
                 "Camembert_baseline": {
                   "answer_pred": "argent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.16666666666666666
                 },
                 "llama-2_lora": {
                   "answer_pred": "argent qu' il avait promis",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.16666666666666666
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' argent qu' il avait promis",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.16666666666666666
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'argent",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.16666666666666666
                 }
               },
               "human_annot": {
@@ -37496,32 +37474,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -37704,28 +37656,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "question": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Question douteuse"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Question douteuse"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Question douteuse"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Question douteuse"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Question douteuse"
-                  }
-                ],
                 "no_answer": [
                   {
                     "annot": "annot_3",
@@ -37788,33 +37718,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -37848,12 +37785,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -37911,33 +37842,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une dizaine de kilos",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -37971,12 +37909,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -38040,33 +37972,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "comment ces maisons se présentaient et comment elles étaient construites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "comment ces maisons se présentaient et comment elles étaient construites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "comment ces maisons se présenteraient et comment elles étaient construites",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "comment ces maisons se présentaient et comment elles étaient construites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Comment se présentait la maison principale ?",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "comment ces maisons se présentaient et comment elles étaient construites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les archéologues ont compris comment ces maisons étaient construites.",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -38106,12 +38045,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -38169,33 +38102,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "quelques en années auparavant",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "quelques décennies auparavant.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -38229,12 +38169,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -38292,33 +38226,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "quelques en années auparavant",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "quelques décennies auparavant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -38352,12 +38293,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -38421,33 +38356,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "au sud de l' île Tibérine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "île Tibérine",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au sud de l' île Tibérine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "juste au sud de l' île Tibérine",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sud de l' île Tibérine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "juste au sud de l' île Tibérine",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au sud de l'île Tibérine",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -38469,12 +38411,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -38549,33 +38485,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -38615,12 +38558,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -38677,33 +38614,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -38737,12 +38681,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -38801,33 +38739,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -38867,12 +38812,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -38931,33 +38870,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains,",
-                  "rougeL": 0.6956521739130436
+                  "rougeL": 0.6956521739130436,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour obtenir sa libération",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.23529411764705882
+                  "rougeL": 0.23529411764705882,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -39003,12 +38949,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -39053,42 +38993,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains,",
-                  "rougeL": 0.6956521739130436
+                  "rougeL": 0.6956521739130436,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.6399999999999999
+                  "rougeL": 0.6399999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -39193,33 +39134,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bateaux de commerce fabriqués dans du bois de cèdre et rendus étanches grâce à un calfatage à l' aide bitume",
-                  "rougeL": 0.23529411764705882
+                  "rougeL": 0.23529411764705882,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "rendus étanches",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bateaux de commerce fabriqués dans du bois de cèdre et rendus étanches",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bateaux de commerce",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "bateaux de commerce fabriqués dans du bois de cèdre et rendus étanches grâce à un calfatage à l' aide bitume",
-                  "rougeL": 0.23529411764705882
+                  "rougeL": 0.23529411764705882,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bateaux de commerce",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "du bois de cèdre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -39235,12 +39183,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -39333,33 +39275,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Nabuchodonosor II",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Nabuchodonosor II",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' Empire babylonien",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Nabuchodonosor II",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "VIe siècle av. J.-C.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Babylone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Babylone.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -39375,12 +39324,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -39462,33 +39405,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Crésus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Crésus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Crésus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Crésus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Crésus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Crésus, le riche roi de Lydie",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Crésus, le riche roi de Lydie",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -39498,12 +39448,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -39593,33 +39537,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "grec ancien.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -39659,12 +39610,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -39715,33 +39660,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "en grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en grec ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -39775,12 +39727,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -39843,33 +39789,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.75
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.75
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.75
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.75
                 },
                 "llama-2_lora": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.75
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.75
                 },
                 "GPT-3.5": {
                   "answer_pred": "les combats de gladiateurs et de bêtes sauvages",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.6
                 }
               },
               "human_annot": {
@@ -40041,48 +39994,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -40290,33 +40201,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "combats de gladiateurs et de bêtes sauvages",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "combats de gladiateurs et de bêtes sauvages",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "combats de gladiateurs et de bêtes sauvages",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les combats de gladiateurs et de bêtes sauvages.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -40362,12 +40280,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -40413,33 +40325,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les bêtes sauvages",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -40479,12 +40398,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -40542,33 +40455,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un oppidum où s' était établie la tribu gauloise des Parisii depuis le IIIe siècle av. J.-C.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un oppidum où s' était établie la tribu gauloise des Parisii depuis le IIIe siècle av. J.-C.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un oppidum où s' était établie la tribu gauloise des Parisii depuis le IIIe siècle av. J.-C.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "oppidum",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un oppidum",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un oppidum où s' était établie la tribu gauloise des Parisii depuis le IIIe siècle av. J.-C.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il existait un oppidum où s' était établie la tribu gauloise des Parisii depuis le IIIe siècle av. J.-C.",
-                  "rougeL": 0.2727272727272727
+                  "rougeL": 0.2727272727272727,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -40590,12 +40510,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -40670,33 +40584,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "consul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "consul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "consul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "consul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "propréteur en Bétique",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il est nommé consul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "consul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -40742,12 +40663,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -40792,33 +40707,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "consul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "consul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "consul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "consul",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "propréteur en Bétique",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "préteur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "préteur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -40828,12 +40750,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -40932,33 +40848,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les âmes des Justes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les âmes des Justes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "âmes des Justes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les âmes des Justes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -40980,12 +40903,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -41061,33 +40978,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "de l' affronter",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "de l' affronter",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "de l' affronter",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de l' affronter.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "affronter Attila",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "décide de l' affronter",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "d'affronter",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -41097,12 +41021,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -41189,33 +41107,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la vie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la vie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "donne la vie à l' homme créé à partir de l' argile",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "donne la vie",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la vie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "donne la vie à l' homme",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "À donner la vie à l'homme.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -41261,12 +41186,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -41311,33 +41230,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la vie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la vie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la vie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la vie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la vie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la vie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le sang donne la vie à l'homme.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -41371,12 +41297,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -41439,33 +41359,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un nœud inextricable",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "roi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Trancher le nud gordien",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "souverain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "roi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le symbole de la royauté et de la divinité chez les Perses, les Étrusques, les Germains et les Illyriens.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il devient souverain.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -41511,12 +41438,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -41561,33 +41482,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "souverain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "souverain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "souverain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "souverain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "roi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "souverain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "souverain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -41621,12 +41549,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -41683,33 +41605,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "souverain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "souverain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Trancher le nud gordien",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Trancher le nœud gordien",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "symbole de la royauté et de la divinité chez les Perses, les Étrusques, les Germains et les Illyriens.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "symbole de la royauté et de la divinité chez les Perses, les Étrusques, les Germains et les Illyriens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le symbole de la royauté et de la divinité",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -41737,12 +41666,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -41811,33 +41734,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "édile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "édile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "édile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "grand pontife (chef de la religion nationale romaine)",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -41847,12 +41777,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -41933,33 +41857,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -41993,12 +41924,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -42055,33 +41980,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "édile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "édile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "édile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il devient édile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "grand pontife",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il devient édile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il devient édile.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -42127,12 +42059,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -42183,33 +42109,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en 206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "206 av. J.-C.",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "début de la Dynastie Han en 206.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -42243,12 +42176,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -42305,33 +42232,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "206 av. J.-C.",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "206",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -42377,12 +42311,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -42433,33 +42361,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Les Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -42493,12 +42428,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -42555,33 +42484,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Les Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les Romains",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les Romains.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -42597,12 +42533,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -42683,33 +42613,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quatorze années de combats",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la conquête musulmane de la Perse",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la conquête musulmane de la Perse",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la conquête musulmane de la Perse",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "632 - 651",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "conquête musulmane de la Perse",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "après quatorze années de combats perdus contre les envahisseurs arabes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -42737,12 +42674,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -42808,42 +42739,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "lors de la conquête musulmane de la Perse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "lors de la conquête musulmane de la Perse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "lors de la conquête musulmane de la Perse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "lors de la conquête musulmane de la Perse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "632 - 651",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "651",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la conquête musulmane de la Perse",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -42939,33 +42871,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "agriculture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "agriculture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "agriculture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "agriculture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' agriculture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' agriculture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'agriculture",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -42975,12 +42914,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -43067,33 +43000,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "flore et faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la flore et la faune.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -43121,12 +43061,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -43189,33 +43123,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "flore et faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la flore et la faune.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -43249,12 +43190,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -43317,33 +43252,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Circus Maximus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Circus Maximus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Circus Maximus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le Circus Maximus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Circus Maximus contient 250 000 places.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Circus Maximus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le Circus Maximus.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -43365,12 +43307,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -43439,33 +43375,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Circus Maximus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "toute la façade du Mont Palatin",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le Circus Maximus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "toute la façade du Mont Palatin",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le Circus Maximus contient 250000 places. Il occupe toute la façade du Mont Palatin.",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Circus Maximus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le circus Maximus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -43493,12 +43436,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -43567,33 +43504,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "statuette",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Une statuette en bronze et or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -43627,12 +43571,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -43695,33 +43633,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Han",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -43767,12 +43712,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -43817,33 +43756,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "début de la Dynastie Han",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Époque hellénistique",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Dynastie Han",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -43859,12 +43805,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -43945,33 +43885,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "peuples résidant en Gaule",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -43999,12 +43946,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -44067,33 +44008,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -44133,12 +44081,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -44195,33 +44137,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les villes de l' Empire romain,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Rome et les villes de l' Empire romain",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans les villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans les villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -44243,12 +44192,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -44318,33 +44261,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans les villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Dans les villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Dans les villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "villes de l' Empire romain,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans les villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans les villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans les villes de l' Empire romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -44384,12 +44334,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -44447,33 +44391,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son père adoptif",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "faire construire un théâtre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un autre terrain proche du temple d' Apollon",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "faire construire un théâtre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -44507,12 +44458,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -44569,33 +44514,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un autre terrain proche du temple d' Apollon",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le projet de son père adoptif.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -44629,12 +44581,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -44709,42 +44655,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le roi Cécrops",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le roi Cécrops",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "roi Cécrops",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "roi Cécrops",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le roi Cécrops",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le roi Cécrops",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le roi Cécrops.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -44838,33 +44785,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en Occident",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Occident",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Rome",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Occident",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en Occident",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Rome",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Rome",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -44898,12 +44852,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -44979,33 +44927,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "empire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "empire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' empire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Empire perse achéménide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' empire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Empire perse achéménide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' Empire perse achéménide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -45021,12 +44976,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -45107,33 +45056,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Constantinople",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Constantinople",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Constantinople",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "capitale de l' Empire romain.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Byzance fut une ville grecque avant de devenir, sous le nom de Constantinople, la capitale de l' Empire romain.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Constantinople",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "sous le nom de Constantinople la capitale de l' Empire romain.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -45155,12 +45111,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -45230,33 +45180,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Empire romain",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Empire romain",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Empire romain",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Empire romain.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Empire romain",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": " Empire romain",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Byzance est devenue la capitale de l'Empire romain.",
-                  "rougeL": 0.36363636363636365
+                  "rougeL": 0.36363636363636365,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -45278,12 +45235,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -45359,33 +45310,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "flore et faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -45413,12 +45371,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -45481,33 +45433,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la flore et la faune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la flore et la faune.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -45535,12 +45494,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -45609,33 +45562,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pour venger Pompée.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -45663,12 +45623,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -45731,33 +45685,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -45797,12 +45758,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -45859,33 +45814,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une nuée de flèches tirées par-dessus l' épaule",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une nuée de flèches tirées par-dessus l' épaule",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nuée de flèches tirées par-dessus l' épaule",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une nuée de flèches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une nuée de flèches tirées par-dessus l' épaule.",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une nuée de flèches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une nuée de flèches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -45901,12 +45863,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -45988,33 +45944,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des documents administratifs et des listes qui ne nous apprennent rien sur les événements historiques",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "difficiles à interpréter",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "difficiles à interpréter : ce sont des documents administratifs et des listes qui ne nous apprennent rien sur les événements historiques",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "difficiles à interpréter : ce sont des documents administratifs et des listes qui ne nous apprennent rien sur les événements historiques",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "difficiles à interpréter",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "difficultes à interpréter",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "difficiles à interpréter",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -46048,12 +46011,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -46112,33 +46069,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "documents administratifs et des listes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "documents administratifs et des listes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "écriture se développe, mais les textes écrits à cette époque sont difficiles à interpréter : ce sont des documents administratifs et des listes qui ne nous apprennent rien sur les événements historiques",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des documents administratifs et des listes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "écriture",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "documents administratifs et des listes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'y a pas de mention des médias utilisés à cette époque dans l'article.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -46172,12 +46136,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -46242,33 +46200,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -46308,12 +46273,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -46368,33 +46327,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -46422,12 +46388,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -46500,33 +46460,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vercingétorix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vercingétorix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Vercingétorix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vercingétorix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vercingétorix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vercingétorix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Vercingétorix décide",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -46554,12 +46521,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -46628,33 +46589,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -46694,12 +46662,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -46750,33 +46712,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -46798,12 +46767,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -46878,33 +46841,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.6956521739130436
+                  "rougeL": 0.6956521739130436,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains,",
-                  "rougeL": 0.6956521739130436
+                  "rougeL": 0.6956521739130436,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5925925925925926
+                  "rougeL": 0.5925925925925926,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "après un long siège, lors duquel la population gauloise d' Alésia est privée de nourriture à cause de l' encerclement fait par les Romains",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' encerclement fait par les Romains",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -46938,12 +46908,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -47006,33 +46970,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fabrication de la bière",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fabrication de la bière",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -47048,12 +47019,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -47128,33 +47093,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la bière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -47188,12 +47160,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -47268,42 +47234,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "jardins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "jardins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "jardins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ces jardins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "jardins suspendus",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jardins suspendus de Sémiramis",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les jardins suspendus",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -47396,33 +47363,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "venger Pompée.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -47456,12 +47430,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -47524,33 +47492,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -47584,12 +47559,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -47646,33 +47615,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "rassembler toutes les tribus gauloises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -47706,12 +47682,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -47774,33 +47744,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mythologie grecque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mythologie grecque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la mythologie grecque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "celle de la mythologie grecque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mythologie grecque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mythologie grecque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mythologie grecque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -47840,12 +47817,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -47896,33 +47867,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grecque",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mythologie grecque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grecque",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mythologie grecque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "grecque",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mythologie grecque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mythologie grecque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -47968,12 +47946,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -48036,33 +48008,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -48096,12 +48075,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -48165,33 +48138,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Grand Roi",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Grand Roi",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -48231,12 +48211,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -48287,33 +48261,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le « Grand Roi »",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le roi des Perses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -48341,12 +48322,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -48427,33 +48402,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les ressources indispensables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -48481,12 +48463,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -48561,33 +48537,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "métaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "métaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "métaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "métaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "métaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "métaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "métaux indispensables",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -48621,12 +48604,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -48698,33 +48675,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trois saisons et une préquelle",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "série sur Spartacus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "série sur Spartacus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "série sur Spartacus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Spartacus. Fiction. Le personnage de Spartacus a été porté à l' écran, dans un péplum hollywoodien. Ce film a été réalisé par Stanley Kubrick. C' est l' acteur Kirk Douglas qui interprétait ce gladiateur rebelle. Steven S. DeKnight a aussi crée une série sur Spartacus. Elle contient 39 épisodes en trois saisons et une préquelle. Sources. https://fr.wikipedia.org/wiki/Spartacus_%28s%C3%A9rie_t%C3%A9l%C3%A9vis%C3%A9e%29. https://fr.wikipedia.org/wiki/Spartacus. http://www.cosmovisions.com/Spartacus.htm. http://www.larousse.fr/encycloped",
-                  "rougeL": 0.07874015748031496
+                  "rougeL": 0.07874015748031496,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une série sur Spartacus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La série sur Spartacus.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -48758,12 +48742,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -48844,33 +48822,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Abbas Ibn Firnas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Abbas Ibn Firnas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Abbas Ibn Firnas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Abbas Ibn Firnas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Abbas Ibn Firnas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Abbas Ibn Firnas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Abbas Ibn Firnas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -48904,12 +48889,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -48984,42 +48963,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "femme étrusque",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -49124,42 +49104,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "titre des anciens empereurs allemands (Kaiser) et des anciens souverains de la Bulgarie (de 893 à 1422 et de 1908 à 1946) et de la Serbie (de 1346 à 1371) et de la Russi",
-                  "rougeL": 0.0625
+                  "rougeL": 0.0625,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Kaiser",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Kaiser",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les empereurs romains",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Kaiser",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "du titre des anciens empereurs allemands (Kaiser)",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "du titre de « César »",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -49252,33 +49233,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mort du héro",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la mort du héro",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la mort du héro",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "après la mort du héro.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -49294,12 +49282,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -49374,33 +49356,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "héro",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "héro",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Héraclès",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' Héraclès",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Héraclès",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Héraclès",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Après la mort du héros Héraclès.",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -49410,12 +49399,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -49502,33 +49485,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "gladiateurs et de bêtes sauvages",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "gladiateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "gladiateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "gladiateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "gladiateurs et de bêtes sauvages",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "gladiateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les combats de gladiateurs et de bêtes sauvages.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -49550,12 +49540,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -49630,42 +49614,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pompée et du Sénat romain",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Pompée et du Sénat romain",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pompée et du Sénat romain",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pompée et du Sénat romain",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Second triumvirat",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "rapprochement de Pompée et du Sénat romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le rapprochement de Pompée et du Sénat romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -49754,33 +49739,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mort de Crassus",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mort de Crassus",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ce triumvirat prend fin à la mort de Crassus",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "rapprochement de Pompée et du Sénat romain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "mort de Crassus",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la mort de Crassus",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mort de Crassus",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -49826,12 +49818,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -49884,33 +49870,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "faire périr l' humanité en la noyant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le dieu Enki intervient à chaque fois pour sauver les hommes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "faire périr l' humanité en la noyant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "leurs créatures, les hommes deviennent trop nombreux et qu' ils font trop de bruits.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour faire disparaître les hommes",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les dieux considèrent que leurs créatures, les hommes deviennent trop nombreux et qu' ils font trop de bruits.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "faire périr l'humanité en la noyant",
-                  "rougeL": 0.7999999999999999
+                  "rougeL": 0.7999999999999999,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -49926,12 +49919,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -50008,33 +49995,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "faire périr l' humanité en la noyant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "faire périr l' humanité en la noyant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "faire périr l' humanité en la noyant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "faire périr l' humanité en la noyant.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "périr l'humanité en la noyant",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "faire périr l' humanité en la noyant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "faire périr l'humanité en la noyant.",
-                  "rougeL": 0.7999999999999999
+                  "rougeL": 0.7999999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -50074,12 +50068,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -50138,33 +50126,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "En Egypte et en Mésopotamie",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "En Egypte et en Mésopotamie",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "En Egypte et en Mésopotamie",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "En Egypte et en Mésopotamie",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Egypte et en Mésopotamie (l' Irak)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "En Egypte et en Mésopotamie (l' Irak)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En Egypte et en Mésopotamie.",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -50174,12 +50169,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -50260,33 +50249,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Egypte et en Mésopotamie",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Egypte et en Mésopotamie",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "En Egypte et en Mésopotamie",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "En Egypte et en Mésopotamie",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Egypte et en Mésopotamie (l' Irak)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Egypte et Mésopotamie (l'Irak)",
-                  "rougeL": 0.7368421052631577
+                  "rougeL": 0.7368421052631577,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En Egypte et en Mésopotamie (l' Irak)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -50332,12 +50328,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -50388,42 +50378,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "César",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "César",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "César",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "62 av. J.-C.,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "César",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il est élu préteur",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -50528,33 +50519,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "roi Hammourabi",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le roi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -50576,12 +50574,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -50657,33 +50649,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Une coalition des peuples résidant en Gaule",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -50723,12 +50722,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -50785,33 +50778,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un mouvement de rotation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un mouvement de rotation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mouvement de rotation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un mouvement de rotation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un mouvement de rotation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un mouvement de rotation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un mouvement de rotation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -50845,12 +50845,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -50907,33 +50901,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "rotation",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "rotation",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mouvement de rotation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "rotation",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mouvement de rotation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "rotation",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un mouvement de rotation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -50979,12 +50980,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -51035,33 +51030,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trancher le nœud gordien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "trancher le nud gordien",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trancher le nud gordien",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "trancher le nœud gordien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "trancher le nœud gordien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "trancher le nœud gordien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "trancher le nœud gordien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -51083,12 +51085,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -51157,33 +51153,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trancher le nœud gordien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "trancher le nud gordien",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trancher le nud gordien",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "trancher le nœud gordien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "trancher le nœud gordien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "trancher le nœud gordien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "trancher le nœud gordien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -51205,12 +51208,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -51297,33 +51294,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Galba",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "province d' Afrique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "province d' Afrique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Galba Galba",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Othon",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Galba",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Néron l'a envoyé en Taraconaise.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -51339,12 +51343,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -51425,33 +51423,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "une horloge à eau, une machine à couper les pierres et une sorte de métronome.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "machine à voler, verres de lecture, horloge à eau, machine à couper les pierres, métronome, verre à quartz",
-                  "rougeL": 0.7826086956521738
+                  "rougeL": 0.7826086956521738,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Abbas Ibn Firnas a inventé des verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -51485,12 +51490,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -51547,33 +51546,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "verres de lecture, horloge à eau, machine à couper les pierres, métronome, verre à quartz",
-                  "rougeL": 0.8571428571428572
+                  "rougeL": 0.8571428571428572,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "des verres de lecture, une horloge à eau, une machine à couper les pierres et une sorte de métronome, le verre à quartz.",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -51583,12 +51589,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -51687,33 +51687,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les âmes des Justes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les âmes des Justes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les âmes des Justes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les âmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -51759,12 +51766,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -51816,33 +51817,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en Anatolie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Anatolie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Anatolie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Anatolie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en Anatolie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en Anatolie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Anatolie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -51858,12 +51866,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -51945,33 +51947,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Veni vidi vici",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Veni vidi vici",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Veni vidi vici",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Veni vidi vici",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Veni vidi vici",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Veni vidi vici",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Veni vidi vici",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -52011,12 +52020,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -52073,33 +52076,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le roi doit agir pour le bien de ses sujets. Il garantit la paix et la justice, pour cela il édicte des lois et dispose d' une armée. Il doit favoriser le bonheur matériel, en ordonnant les grands travaux nécessaires (surtout dans un pays dépendant de l' irrigation) ou en annulant les dettes.",
-                  "rougeL": 0.1111111111111111
+                  "rougeL": 0.1111111111111111,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "agit pour le bien de ses sujets",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le roi agit pour le bien de ses sujets.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -52109,12 +52119,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -52195,33 +52199,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour le bien de ses sujets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -52249,12 +52260,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -52323,33 +52328,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "agriculture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "agriculture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "agriculture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' agriculture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "plateaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une région de plateaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'agriculture",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -52395,12 +52407,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -52451,42 +52457,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "il choisit ses soldats et ne les achète pas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Il refuse de donner l' argent qu' il avait promis aux prétoriens en disant qu' il choisit ses soldats et ne les achète pas",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "il choisit ses soldats et ne les achète pas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il choisit ses soldats et ne les achète pas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "il les achète",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "qu' il choisit ses soldats et ne les achète pas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il choisit ses soldats et ne les achète pas.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -52580,33 +52587,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -52646,12 +52660,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -52702,33 +52710,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Vinius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -52762,12 +52777,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -52830,33 +52839,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -52890,12 +52906,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -52952,33 +52962,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Héraclès",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "héro",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Héraclès",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' Héraclès",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Héraclès",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Héraclès",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -53000,12 +53017,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -53092,33 +53103,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "titre des anciens souverains de la Bulgarie (de 893 à 1422 et de 1908 à 1946) et de la Serbie (de 1346 à 1371) et de la Russie (de 1472 à 1917)",
-                  "rougeL": 0.0689655172413793
+                  "rougeL": 0.0689655172413793,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Kaiser",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Kaiser",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les empereurs romains",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Kaiser",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "César",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le titre des anciens empereurs allemands (Kaiser)",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -53158,12 +53176,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -53226,42 +53238,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "titre des anciens souverains de la Bulgarie (de 893 à 1422 et de 1908 à 1946) de la Serbie (de 1346 à 1371) et de la Russie (de 1472 à 1917)",
-                  "rougeL": 0.0689655172413793
+                  "rougeL": 0.0689655172413793,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Kaiser",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les empereurs romains portèrent le titre de « César »",
-                  "rougeL": 0.631578947368421
+                  "rougeL": 0.631578947368421,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "(Kaiser)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Kaiser",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "César",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le titre des anciens empereurs allemands est à l'origine du titre des empereurs romains qui portaient le titre de « César ».",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -53354,33 +53367,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Tigellin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Tigellin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Tigellin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tigellin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tigellin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Tigellin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Tigellin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -53414,12 +53434,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -53483,33 +53497,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trouver une solution",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "trouver une solution",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "de trouver une solution",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de trouver une solution",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une solution",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "de trouver une solution",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Elle demande à Enki de trouver une solution.",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -53537,12 +53558,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -53605,33 +53620,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son fils ou son petit-fils",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une solution",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une solution",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une solution",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une solution",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une solution",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une solution.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -53665,12 +53687,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -53751,33 +53767,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son verdict",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "verdict",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son verdict",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' énergie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "s'il lève le pouce, le blessé a la vie sauve. s' il l' abaisse, c' est la mort.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son verdict",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "son verdict.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -53799,12 +53822,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -53879,42 +53896,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jules César",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -54019,33 +54037,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "rideau principal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "aulaeum",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "aulaeum",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "rideaux de scène",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "aulaeum",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Aulaeum",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le rideau principal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -54085,12 +54110,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -54147,33 +54166,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quatre fluides",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "quatre fluides",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "humeurs",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "humeurs",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "quatre fluides",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quatre fluides",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les humeurs",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -54207,12 +54233,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -54269,33 +54289,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quatre",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "quatre",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "quatre",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "quatre",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "quatre",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quatre fluides",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "quatre fluides",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -54335,12 +54362,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -54397,33 +54418,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "avant la construction d' un grand barrage sur l' Euphrate",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au cours des années 1990",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au cours des années 1990, juste avant la construction d' un grand barrage sur l' Euphrate",
-                  "rougeL": 0.8421052631578948
+                  "rougeL": 0.8421052631578948,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "juste avant la construction d' un grand barrage sur l' Euphrate,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "au cours des années 1990",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "au cours des années 1990",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "juste avant la construction d'un grand barrage sur l'Euphrate",
-                  "rougeL": 0.631578947368421
+                  "rougeL": 0.631578947368421,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -54433,12 +54461,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -54520,33 +54542,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "construction d' un grand barrage sur l' Euphrate",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "construction d' un grand barrage sur l' Euphrate",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "construction d' un grand barrage sur l' Euphrate",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "construction d' un grand barrage sur l' Euphrate,",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "construction d' un grand barrage sur l' Euphrate",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la construction d' un grand barrage sur l' Euphrate",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la construction d'un grand barrage sur l'Euphrate",
-                  "rougeL": 0.47058823529411764
+                  "rougeL": 0.47058823529411764,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -54580,12 +54609,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -54661,33 +54684,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ils sont valets d' armes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Spartiates",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les citoyens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' hilote",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Spartiates",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Il est très exceptionnel qu' on leur demande de combattre (cela se fera pendant la guerre du Péloponnèse qui oppose Sparte à Athènes dans la seconde moitié du Ve siècle av. J.-C.)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "on leur demande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -54715,12 +54745,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -54789,33 +54813,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jésus partit",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une colombe descendit du ciel",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Plus tard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean-Baptiste fut arrêté.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -54855,12 +54886,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -54917,33 +54942,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "après la mort du héro.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -54977,12 +55009,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -55045,33 +55071,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une des épreuves des Jeux olympiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "épreuves des Jeux olympiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une des épreuves des Jeux olympiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une des épreuves des Jeux olympiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une épreuve des Jeux olympiques",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' une des épreuves des Jeux olympiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une des épreuves des Jeux olympiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -55105,12 +55138,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -55173,33 +55200,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un morceau de bois d' environ 1,60 m.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -55245,12 +55279,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -55295,33 +55323,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un morceau de bois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "un morceau de bois d' environ 1,60 m.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -55361,12 +55396,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -55423,33 +55452,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "encercle sur un oppidum (un camp militaire sur colline), à Alésia",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "armées de Jules César",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son père",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une victoire importante",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "une victoire importante contre les Romains lors de la bataille de Gergovie",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bataille de Gergovie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Vercingétorix perd contre les Romains.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -55477,12 +55513,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -55553,33 +55583,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Hammurabi",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "époque d' Hammurabi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "époque d' Hammurabi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de l' époque d' Hammurabi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' époque d' Hammurabi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' époque d' Hammurabi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "époque d'Hammurabi",
-                  "rougeL": 0.8000000000000002
+                  "rougeL": 0.8000000000000002,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -55607,12 +55644,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -55675,33 +55706,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "époque d' Hammurabi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "de l' époque d' Hammurabi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "de l' époque d' Hammurabi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de l' époque d' Hammurabi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "d' Hammurabi",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "époque d' Hammurabi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "époque d'Hammurabi",
-                  "rougeL": 0.8000000000000002
+                  "rougeL": 0.8000000000000002,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -55741,12 +55779,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -55803,33 +55835,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les « îles Cassitérides »",
-                  "rougeL": 0.13333333333333333
+                  "rougeL": 0.13333333333333333,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "aux « îles Cassitérides » (probablement les îles Scilly au large de la Cornouaille britannique)",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -55869,12 +55908,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -55922,33 +55955,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cassitérides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.95
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.95
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.95
                 },
                 "Camembert_baseline": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.95
                 },
                 "llama-2_lora": {
                   "answer_pred": "les îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.95
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.95
                 },
                 "GPT-3.5": {
                   "answer_pred": "les îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.95
                 }
               },
               "human_annot": {
@@ -56162,48 +56202,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -56365,33 +56363,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les « îles Cassitérides »",
-                  "rougeL": 0.13333333333333333
+                  "rougeL": 0.13333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les « îles Cassitérides",
-                  "rougeL": 0.14285714285714288
+                  "rougeL": 0.14285714285714288,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les îles Cassitérides (probablement les îles Scilly au large de la Cornouaille britannique)",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -56437,12 +56442,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -56502,33 +56501,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fortes variations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "très fortes pluies et de sécheresses prolongées",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "alternance de très fortes pluies et de sécheresses prolongées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "changements climatiques. À cette époque, les sédiments montrent une alternance de très fortes pluies et de sécheresses prolongées.",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par des variations climatiques",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "forces variations",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "par des variations climatiques",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -56544,12 +56550,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -56630,33 +56630,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "3500 ans avant Jésus-Christ",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "3500 ans avant Jésus-Christ",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "environ 3500 ans avant Jésus-Christ",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "3500 ans avant Jésus-Christ,",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "environ 3500 ans avant Jésus-Christ",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "environ 3500 ans avant Jésus-Christ",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "environ 3500 ans avant Jésus-Christ",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -56672,12 +56679,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -56753,33 +56754,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "3500 ans avant Jésus-Christ",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "3500 ans avant Jésus-Christ",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "3500 ans avant Jésus-Christ",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "3500 ans avant Jésus-Christ,",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "environ 3500 ans avant Jésus-Christ",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "environ 3500 ans avant Jésus-Christ",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "environ 3500 ans avant Jésus-Christ",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -56813,12 +56821,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -56894,33 +56896,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' alphabet étrusque est dérivé de l' alphabet grec",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "alphabet étrusque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' alphabet étrusque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' alphabet étrusque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' alphabet grec",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l'alphabet étrusque",
-                  "rougeL": 0.8000000000000002
+                  "rougeL": 0.8000000000000002,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'alphabet étrusque",
-                  "rougeL": 0.8000000000000002
+                  "rougeL": 0.8000000000000002,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -56930,12 +56939,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -57035,33 +57038,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "arme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "arme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "arme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' arme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "le javelot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' arme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'arme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -57107,12 +57117,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -57175,33 +57179,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les cavaliers",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les cavaliers",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -57217,12 +57228,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -57312,33 +57317,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les cavaliers",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les cavaliers parthes l' attaquent grâce à une nuée de flèches tirées par-dessus l' épaule",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "parthes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "parthes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -57384,12 +57396,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -57443,33 +57449,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -57503,12 +57516,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -57565,33 +57572,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "venger Pompée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "venger Pompée.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -57625,12 +57639,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -57693,33 +57701,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -57753,12 +57768,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -57815,33 +57824,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jules César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -57875,12 +57891,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -57943,33 +57953,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -57979,12 +57996,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -58065,33 +58076,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une bibliothèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -58101,12 +58119,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -58193,33 +58205,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "comme un oiseau afin de battre des ailes",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "comme un oiseau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "comme un oiseau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "comme un oiseau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "comme un oiseau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "comme un oiseau afin de battre des ailes",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "comme un oiseau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -58265,12 +58284,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -58316,33 +58329,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des ailes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bouger ses bras comme un oiseau",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bouger",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des ailes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "comme un oiseau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "comme un oiseau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "comme un oiseau.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -58352,12 +58372,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -58445,33 +58459,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à la même date que la mort de Crassus en 53 av. J.C",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à la même date",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -58505,12 +58526,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -58568,33 +58583,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 53 av. J.C",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -58622,12 +58644,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -58697,33 +58713,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Auguste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Auguste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Auguste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Auguste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Auguste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Auguste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Auguste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -58757,12 +58780,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -58837,33 +58854,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sant Martí d' Empúries",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Paléopolis",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ancien îlot est en totalité occupé par le village de Sant Mart d' Empries",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "La Paléopolis",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Paléopolis",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' ancien îlot",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Sant Martí d' Empúries",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -58909,12 +58933,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -58966,33 +58984,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "deux passages latéraux couverts",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "deux passages latéraux couverts",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "deux passages latéraux couverts",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "deux passages latéraux couverts",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un passages latéraux couverts",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux passages latéraux couverts (un aditus maximus)",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "deux passages latéraux couverts",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -59032,12 +59057,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -59088,33 +59107,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "deux passages latéraux couverts",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "deux passages latéraux couverts",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "deux passages latéraux couverts",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "on utilise deux passages latéraux couverts",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "par deux passages latéraux couverts (un aditus maximus) situés de part et d'autre de l' orchestra.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux passages latéraux couverts (un aditus maximus) situés de part et d' autre de l' orchestra",
-                  "rougeL": 0.4210526315789474
+                  "rougeL": 0.4210526315789474,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "on utilise deux passages latéraux couverts.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -59136,12 +59162,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -59228,33 +59248,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Nymphidius Sabinus",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Nymphidius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Nymphidius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Nymphidius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Nymphidius Sabinus",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Nymphidius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Nymphidius Sabinus",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -59276,12 +59303,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -59356,33 +59377,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "caractère monumental",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un caractère monumental",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un caractère monumental",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un caractère monumental",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un caractère monumental",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un caractère monumental",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un caractère monumental",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -59416,12 +59444,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -59478,33 +59500,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "monumental",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "monumental",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "monumental",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "monumental",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "monumental",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "monumental",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "monumental",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -59544,12 +59573,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -59606,33 +59629,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour ses sujets",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -59660,12 +59690,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -59734,33 +59758,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans le courant du - IIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "- IIe siècle",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "- IIe siècle",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "courant du - IIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "- IIe siècle",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "- IIe siècle",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans le courant du - IIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -59800,12 +59831,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -59862,33 +59887,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cassitérides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les \"îles Cassitérides\" (probablement les îles Scilly au large de la Cornouaille britannique).",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -59928,12 +59960,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -59987,42 +60013,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Péloponnèse. Ils ont dû quitter le Péloponnèse",
-                  "rougeL": 0.5882352941176471
+                  "rougeL": 0.5882352941176471,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "du Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ils ont dû quitter le Péloponnèse après la mort du héros.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -60109,33 +60136,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Péloponnèse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -60175,12 +60209,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -60237,33 +60265,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les « îles Cassitérides »",
-                  "rougeL": 0.13333333333333333
+                  "rougeL": 0.13333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "océan Atlantique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les « îles Cassitérides » (probablement les îles Scilly au large de la Cornouaille britannique)",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -60297,12 +60332,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -60356,33 +60385,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cassitérides",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les « îles Cassitérides",
-                  "rougeL": 0.14285714285714288
+                  "rougeL": 0.14285714285714288,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "îles Cassitérides",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les \"îles Cassitérides\" (probablement les îles Scilly au large de la Cornouaille britannique)",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -60410,12 +60446,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -60481,33 +60511,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Arthur Evans (1851 - 1941)",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' archéologue britannique Arthur Evans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'archéologue britannique Arthur Evans",
-                  "rougeL": 0.8888888888888888
+                  "rougeL": 0.8888888888888888,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -60547,12 +60584,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -60603,33 +60634,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Arthur Evans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -60669,12 +60707,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -60731,33 +60763,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "après la mort du héro",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -60791,12 +60830,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -60871,33 +60904,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les cavaliers",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les cavaliers",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -60925,12 +60965,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -61008,33 +61042,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les cavaliers",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les cavaliers parthes l' attaquent grâce à une nuée de flèches tirées par-dessus l' épaule",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "parthes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les cavaliers parthes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -61044,12 +61085,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -61139,33 +61174,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "On",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "On",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "On",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "On a longtemps pensé que les vestiges",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "archéologue",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "On",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "On n'a pas continué les fouilles de l'Odéon.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -61187,12 +61229,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -61267,33 +61303,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son père adoptif",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jules César envisage de faire construire un théâtre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "faire construire un théâtre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le projet de son père adoptif",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -61333,12 +61376,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -61407,33 +61444,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "métaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les ressources indispensables.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -61473,12 +61517,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -61535,33 +61573,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Rome.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -61595,12 +61640,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -61658,33 +61697,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Galba est arrivé à Rome.",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -61718,12 +61764,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -61792,33 +61832,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la maison de Muzalar",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "maison de Muzalar",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "site dit de la maison de Muzalar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "site dit de la maison de Muzalar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans les fouilles du site dit de la maison de Muzalar",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "site dit de la maison de Muzalar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "site dit de la maison de Muzalar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -61840,12 +61887,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -61920,33 +61961,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Dans la grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -61986,12 +62034,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -62043,33 +62085,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Denisova",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Denisova",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Denisova",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la grotte de Denisova",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -62109,12 +62158,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -62172,33 +62215,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -62232,12 +62282,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -62296,33 +62340,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -62362,12 +62413,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -62426,33 +62471,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "50300 ans",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "50300 ans",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "50300 ans",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "50300 ans",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "50300 ans",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "50300 ans (à 2200 ans près)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots qui répond à la question est \"datant de 50300 ans\".",
-                  "rougeL": 0.39999999999999997
+                  "rougeL": 0.39999999999999997,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -62486,12 +62538,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -62548,33 +62594,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "50300 ans",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "50300 ans",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "50300 ans",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "50300 ans",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "50300 ans",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "50300 ans (à 2200 ans près)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "datant de 50300 ans",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -62620,12 +62673,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -62676,33 +62723,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "presque toujours",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "presque toujours",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "toujours",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "presque toujours",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "depuis les sociétés agricoles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Testart propose de considérer la piste de l' ostentation, nous faisant découvrir par là que la richesse n' est pas l' apanage des sociétés agricoles.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la richesse existe presque toujours",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -62718,12 +62772,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -62804,33 +62852,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Maximin Ier le Thrace",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Maximin Ier le Thrace",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Maximin Ier le Thrace",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' empire romain)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Maximin Ier le Thrace",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Maximin Ier le Thrace",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Alexandre Sévère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -62852,12 +62907,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -62929,33 +62978,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "colonne romaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Maximin Ier le Thrace",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "colonne romaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Alexandre Sévère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une colonne romaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une colonne romaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une colonne romaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -62965,12 +63021,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -63054,33 +63104,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chasseurs - cueilleurs en voie de sédentarisation",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des chasseurs - cueilleurs en voie de sédentarisation",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "se fixer dans cette plaine froide et aride entourée de montagnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de se fixer dans cette plaine froide et aride entourée de montagnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "chasseurs - cueilleurs en voie de sédentarisation",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "se fixer dans cette plaine froide et aride entourée de montagnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "se fixer",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -63090,12 +63147,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -63182,33 +63233,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "connaissance, roule contre un éboulis et meurt",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "connaissance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "connaissance, roule contre un éboulis et meurt",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le tigre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "connaissance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "conscience",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "conscience",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -63236,12 +63294,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -63310,33 +63362,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "culture, l' économie et la vie des Européens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la culture, l' économie et la vie des Européens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "culture, l' économie et la vie des Européens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "culture, l' économie et la vie des Européens.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la culture, l' économie et la vie des Européens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "culture, l' économie et la vie des Européens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le passage des populations préhistoriques de la prédation à la production a influencé la culture, l'économie et la vie des Européens",
-                  "rougeL": 0.31578947368421056
+                  "rougeL": 0.31578947368421056,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -63358,12 +63417,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -63432,33 +63485,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "culture, l' économie et la vie des Européens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la culture, l' économie et la vie des Européens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "culture, l' économie et la vie des Européens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "culture, l' économie et la vie des Européens.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la culture, l' économie et la vie des Européens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "culture, l' économie et la vie des Européens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la culture, l'économie et la vie des Européens",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -63504,12 +63564,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -63560,33 +63614,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fondation britannique Leverhulme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fondation britannique Leverhulme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la fondation britannique Leverhulme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -63602,12 +63663,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -63683,33 +63738,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Thibaut Devièse",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -63743,12 +63805,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -63824,42 +63880,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ils ont ainsi pu chasser une plus grande variété d' animaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "membres de la lignée humaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "membres de la lignée humaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "membres de la lignée humaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les membres de la lignée humaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ils",
-                  "rougeL": 0
+                  "rougeL": 0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les membres de la lignée humaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -63953,33 +64010,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sa famille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sa famille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sa famille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa famille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sa famille, qui est aisée",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sa famille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "sa famille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -64019,12 +64083,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -64087,33 +64145,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1630",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1630",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1630",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1630.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1630",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1630",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à partir de 1630",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -64153,12 +64218,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -64209,33 +64268,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1630",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1630",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1630",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1630.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1630",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à partir de 1630",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à partir de 1630",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -64275,12 +64341,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -64349,33 +64409,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "il perd connaissance, roule contre un éboulis et meurt",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Australopithèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' Australopithèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Australopithèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' Australopithèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Australopithèque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Australopithèque",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -64409,12 +64476,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -64477,33 +64538,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "au milieu du ve siècle avant notre ère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour venir s' installer dans le Sud de la Botte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour venir s' installer dans le Sud de la Botte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une partie des Samnites ont quitté l' Italie centrale pour venir s' installer dans le Sud de la Botte",
-                  "rougeL": 0.47058823529411764
+                  "rougeL": 0.47058823529411764,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "parce qu' au milieu du ve siècle avant notre ère, une partie des Samnites ont quitté l' Italie centrale pour venir s' installer dans le Sud de la Botte",
-                  "rougeL": 0.34782608695652173
+                  "rougeL": 0.34782608695652173,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour venir s' installer dans le Sud de la Botte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour venir s'installer dans le Sud de la Botte.",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -64525,12 +64593,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -64599,33 +64661,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "au milieu du ve siècle avant notre ère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour venir s' installer dans le Sud de la Botte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour venir s' installer dans le Sud de la Botte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une partie des Samnites ont quitté l' Italie centrale pour venir s' installer dans le Sud de la Botte",
-                  "rougeL": 0.47058823529411764
+                  "rougeL": 0.47058823529411764,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour venir s'installer dans le Sud de la Botte",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour venir s' installer dans le Sud de la Botte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour venir s'installer dans le Sud de la Botte",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -64659,12 +64728,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -64727,33 +64790,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "carbone à l'état de trace",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -64781,12 +64851,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -64855,33 +64919,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "poissons et coquillages",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "protéines animales",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "poissons et coquillages",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "(poissons et coquillages compris)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "poissons et coquillages",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "poissons et coquillages",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les restes laissés par les grands carnivores",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -64897,12 +64968,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -64983,33 +65048,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tout le contraire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Tout le contraire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Tout le contraire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tout le contraire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tout le contraire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Tout le contraire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Tout le contraire.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -65037,12 +65109,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -65108,33 +65174,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "enfants",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les enfants",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les enfants",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les enfants",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les enfants",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les enfants",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les enfants",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -65162,12 +65235,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -65236,42 +65303,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "59,3 tonnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "59,3 tonnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "59,3 tonnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "59,3 tonnes,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "59,3 tonnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "59.3 tonnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "59,3 tonnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -65358,42 +65426,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "59,3",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "59,3",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "59,3 tonnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "59,3 tonnes,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "59,3 tonnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "59.3 tonnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "59,3 tonnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -65486,33 +65555,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trop d' énergie et de nourriture pour sa mère qui l' élève seule",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "trop d' énergie et de nourriture pour sa mère qui l' élève seule",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trop d' énergie et de nourriture pour sa mère qui l' élève seule",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "trop d' énergie et de nourriture",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "trop d' énergie et de nourriture",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "trop d' énergie et de nourriture",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "trop d'énergie et de nourriture",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -65528,12 +65604,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -65614,33 +65684,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -65662,12 +65739,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -65737,33 +65808,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -65809,12 +65887,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -65866,33 +65938,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "gros animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "gros animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "autres membres du clan",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "gros animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les autres membres du clan",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "gros animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les chasseurs tuent les animaux.",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -65920,12 +65999,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -65988,33 +66061,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "gros animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à piéger, à tuer ou à charogner de gros animaux",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "piéger, à tuer ou à charogner de gros animaux",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "gros animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "gros animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "gros animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "gros animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -66036,12 +66116,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -66116,42 +66190,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les femmes et les vieillards",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -66246,33 +66321,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -66282,12 +66364,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -66389,33 +66465,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "maisons en terre",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les premières maisons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "premières maisons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "premières maisons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "maisons en terre",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les premières maisons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les premières maisons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -66443,12 +66526,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -66518,33 +66595,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "restes d' animaux",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "des lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des lances en bois et des restes d'animaux",
-                  "rougeL": 0.7692307692307692
+                  "rougeL": 0.7692307692307692,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -66584,12 +66668,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -66642,33 +66720,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "lances en bois et des restes d' animaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des lances en bois et des restes d'animaux",
-                  "rougeL": 0.7692307692307692
+                  "rougeL": 0.7692307692307692,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -66678,12 +66763,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -66772,33 +66851,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "troisième",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "troisième",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "troisième",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "troisième",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "troisième",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "troisième",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le troisième vase carolingien",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -66838,12 +66924,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -66901,33 +66981,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pour peupler l' Asie du Sud-Est et l' Europe.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour peupler l'Asie du Sud-Est et l'Europe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -66961,12 +67048,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -67023,33 +67104,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "peupler l' Asie du Sud-Est et l' Europe.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour peupler l'Asie du Sud-Est et l'Europe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour peupler l'Asie du Sud-Est et l'Europe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -67059,12 +67147,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -67151,33 +67233,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plus haut dans la vallée de l' Hérault",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vallée de l' Hérault",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plus haut dans la vallée de l' Hérault",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vallée de l' Hérault,",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "val de Gellone",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un lieu entouré de spectaculaires falaises et surmonté d' un château fort perché sur un piton",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans le val de Gellone",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -67217,12 +67306,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -67279,33 +67362,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "en Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Grotte Chauvet",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Europe, en Espagne, en Ardèche (Grotte Chauvet), sur l'île de Sulewesi",
-                  "rougeL": 0.13333333333333333
+                  "rougeL": 0.13333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -67315,12 +67405,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -67401,33 +67485,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ardèche",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En France (Ardèche) et en Espagne.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -67437,12 +67528,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -67529,33 +67614,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -67595,12 +67687,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -67653,33 +67739,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "moléculaires",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des marqueurs moléculaires fabriqués à partir d' ADN mitochondrial humain",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "marqueurs moléculaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des marqueurs moléculaires fabriqués à partir d'ADN mitochondrial humain.",
-                  "rougeL": 0.4210526315789474
+                  "rougeL": 0.4210526315789474,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -67725,12 +67818,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -67783,33 +67870,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tambo",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tambo",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -67837,12 +67931,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -67905,33 +67993,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tambo",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tambo",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -67977,12 +68072,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -68027,33 +68116,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tambo",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tambo",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le projet européen Tambo.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -68081,12 +68177,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -68155,42 +68245,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "différemment",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "différemment",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "différemment",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "différemment",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ils agiront différemment",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les mêmes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ils agiront différemment.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -68283,33 +68374,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ostentatoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ostentatoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ostentatoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ostentatoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par l' archerie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "expression ostentatoire du statut",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans l' expression ostentatoire",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -68343,12 +68441,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -68405,33 +68497,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ostentatoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ostentatoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ostentatoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ostentatoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ostentatoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' expression ostentatoire de leur statut",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans l' expression ostentatoire",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -68471,12 +68570,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -68533,33 +68626,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "analyse de l' os coxal",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "analyse de l' os coxal",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "analyse de l' os coxal",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "analyse de l' os coxal",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "analyse de l' os coxal",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "analyse de l' os coxal",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une analyse de l'os coxal.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -68773,48 +68873,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -68979,33 +69037,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "os coxal",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "os coxal",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "analyse de l' os coxal",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de l' os coxal",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "analyse de l' os coxal",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "analyse de l' os coxal",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une analyse de l'os coxal",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -69033,12 +69098,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -69107,33 +69166,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petits thermes privés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "thermes privés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "petits thermes privés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "petits thermes privés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des thermes privés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "petits thermes privés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les petits thermes privés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -69167,12 +69233,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -69235,33 +69295,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "connaissance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "connaissance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "connaissance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les doigts de sa main droite se crispent autour du pouce",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "connaissance",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "conscience",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le australopithèque a perdu connaissance",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -69307,12 +69374,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -69363,33 +69424,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "notre devoir d' en assumer la responsabilité",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Nous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Nous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Nous pouvons faire des choix concernant notre comportement.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "nous pouvons faire des choix concernant notre comportement",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "nous, Homo sapiens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -69399,12 +69467,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -69505,33 +69567,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "somptueux textiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ces somptueux textiles servaient à envelopper les corps de défunts notables",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ces somptueux textiles servaient à envelopper les corps de défunts notables",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "textiles",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sommptueux textiles",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "somptueux textiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les somptueux textiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -69559,12 +69628,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -69643,33 +69706,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "somptueux textiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ces somptueux textiles servaient à envelopper les corps de défunts notables",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ces somptueux textiles servaient à envelopper les corps de défunts notables",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ces somptueux textiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "somptueux textiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "somptueux textiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "somptueux textiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -69685,12 +69755,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -69775,33 +69839,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Guillaume",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Guillaume",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Guillaume",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Guillaume",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Guillaume",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Guillaume",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Guillaume",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -69835,12 +69906,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -69903,33 +69968,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Université de Wroclaw",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projet européen Tambo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Józef Szykulski dirige le projet européen Tambo.",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -69957,12 +70029,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -70031,33 +70097,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sémite d' éleveurs nomades",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "éleveurs nomades",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "éleveurs nomades",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "peuple sémite d' éleveurs nomades,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "amorrite",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un peuple sémite d' éleveurs nomades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les éleveurs nomades",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -70091,12 +70164,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -70153,33 +70220,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "peuple sémite d' éleveurs nomades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "éleveurs nomades",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "éleveurs nomades",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "peuple sémite d' éleveurs nomades,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "un peuple sémite d' éleveurs nomades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un peuple sémite d'éleveurs nomades",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un peuple sémite d' éleveurs nomades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -70195,12 +70269,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -70281,33 +70349,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sous l' eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sous l' eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sous l' eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sous l' eau.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "eaux profondes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' eau",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sous l'eau.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -70347,12 +70422,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -70403,33 +70472,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sous l' eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sous l' eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sous l' eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sous l' eau.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sous l'eau",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sous l' eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sous l'eau.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -70463,12 +70539,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -70531,33 +70601,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une météorite",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "météorite",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "météorite",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "météorite",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "environ 100 km de diamètre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une météorite",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une météorite",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -70597,12 +70674,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -70659,33 +70730,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une partie des Samnites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une partie des Samnites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une partie des Samnites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une partie des Samnites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une partie des Samnites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une partie des Samnites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une partie des Samnites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -70719,12 +70797,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -70781,33 +70853,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Samnites",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Samnites",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Samnites",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Samnites",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Samnites",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Samnites",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Samnites",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -70817,12 +70896,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -70909,33 +70982,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "S. Venault et ses collègues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -70975,12 +71055,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -71039,33 +71113,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "génome néandertalien moyen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un génome néandertalien moyen obtenu à partir de plusieurs individus",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un génome néandertalien moyen obtenu à partir de plusieurs individus",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un génome néandertalien moyen obtenu à partir de plusieurs individus",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un génome néandertalien moyen obtenu à partir de plusieurs individus",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un génome néandertalien moyen obtenu à partir de plusieurs individus",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un génome néandertalien moyen obtenue à partir de plusieurs individus",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -71105,12 +71186,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -71162,33 +71237,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "génome néandertalien moyen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "néandertalien moyen",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "néandertalien moyen",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "génome néandertalien moyen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un génome néandertalien moyen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "génome néandertalien moyen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un génome néandertalien moyen obtenue à partir de plusieurs individus",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -71234,12 +71316,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -71303,42 +71379,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la consommation d' aliments caloriques a favorisé la croissance du cerveau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le basculement de nos ancêtres vers une alimentation carnée a favorisé la croissance du cerveau",
-                  "rougeL": 0.7777777777777778
+                  "rougeL": 0.7777777777777778,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "basculement de nos ancêtres vers une alimentation carnée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le basculement de nos ancêtres vers une alimentation carnée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la consommation d' aliments caloriques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "consommation d' aliments caloriques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la consommation d'aliments caloriques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -71431,33 +71508,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -71491,12 +71575,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -71554,33 +71632,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "anciens colorants andins",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -71626,12 +71711,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -71683,33 +71762,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "oscillations climatiques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "oscillations climatiques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' amplitude des oscillations a augmenté depuis environ 1,4 million d' années",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plus nos connaissances sur ces oscillations climatiques progressent, plus nous réalisons à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 0.6896551724137931
+                  "rougeL": 0.6896551724137931,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "nous réalisons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -71755,12 +71841,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -71805,33 +71885,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "instables",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ont dû être instables",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les anciens environnements de nos ancêtres ont dû être instables.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -71841,12 +71928,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -71933,33 +72014,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "endroits qu' elle nous aurait conduits à désigner comme prometteurs",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "endroits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sites ayant produit des découvertes importantes, tel Tim's Confession",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "endroits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sites ayant produit des découvertes importantes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "signatures spectrales",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les sites ayant produit des découvertes importantes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -72005,12 +72093,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -72062,33 +72144,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -72116,12 +72205,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -72184,33 +72267,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -72256,12 +72346,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -72312,33 +72396,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "coévolution des gènes et de la culture",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "coévolution des gènes et de la culture",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "coévolution des gènes et de la culture",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "coévolution des gènes et de la culture",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "coévolution des gènes et de la culture",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "coévolution des gènes et de la culture",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sous le nom de coévolution des gènes et de la culture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -72366,12 +72457,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -72440,33 +72525,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Quel est le nombre d'années ?",
-                  "rougeL": 0.14285714285714288
+                  "rougeL": 0.14285714285714288,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Nous réalisons à quel point les anciens environnements de nos ancêtres ont dû être instables.",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -72494,12 +72586,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -72568,33 +72654,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -72628,12 +72721,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -72691,33 +72778,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur le fond sédimentaire du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -72751,12 +72845,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -72820,33 +72908,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -72886,12 +72981,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -72943,33 +73032,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "outils en pierre",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en pierre",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "outils primitifs en pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Des outils primitifs en pierre.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -73015,12 +73111,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -73072,33 +73162,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -73108,12 +73205,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -73194,33 +73285,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "laitière",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "laitière",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' exploitation laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -73230,12 +73328,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -73334,33 +73426,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Scandinavie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Scandinavie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Scandinavie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Scandinavie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Scandinavie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Scandinavie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Scandinavie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -73388,12 +73487,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -73462,33 +73555,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -73522,12 +73622,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -73584,33 +73678,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "matériel génétique néandertalien",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1 à 4 % de matériel génétique néandertalien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -73644,12 +73745,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -73712,33 +73807,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Totolapa au Chiapas",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "formation de Totolapa",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "formation de Totolapa au Chiapas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Totolapa",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "formation de Totolapa au Chiapas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "formation de Totolapa au Chiapas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "formation de Totolapa",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -73748,12 +73850,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -73834,33 +73930,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans la formation de Totolapa au Chiapas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "formation de Totolapa au Chiapas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "formation de Totolapa au Chiapas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "formation de Totolapa au Chiapas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "formation de Totolapa au Chiapas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la formation de Totolapa au Chiapas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au Chiapas",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -73894,12 +73997,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -73962,33 +74059,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "étudier les anciens colorants andins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "étudier les anciens colorants andins.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -74022,12 +74126,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -74091,33 +74189,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tombes néolithiques d' une vaste zone allant de la vallée du Rhône à la Slovaquie",
-                  "rougeL": 0.5217391304347826
+                  "rougeL": 0.5217391304347826,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tombes néolithiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tombes néolithiques d' une vaste zone allant de la vallée du Rhône à la Slovaquie",
-                  "rougeL": 0.5217391304347826
+                  "rougeL": 0.5217391304347826,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tombes néolithiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tombes néolithiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les tombes néolithiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les tombes néolithiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -74163,12 +74268,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -74216,33 +74315,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tombes néolithiques d' une vaste zone allant de la vallée du Rhône à la Slovaquie",
-                  "rougeL": 0.5217391304347826
+                  "rougeL": 0.5217391304347826,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tombes néolithiques d' une vaste zone allant de la vallée du Rhône à la Slovaquie",
-                  "rougeL": 0.5217391304347826
+                  "rougeL": 0.5217391304347826,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tombes néolithiques d' une vaste zone allant de la vallée du Rhône à la Slovaquie",
-                  "rougeL": 0.5217391304347826
+                  "rougeL": 0.5217391304347826,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "néolithiques",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de la vallée du Rhône à la Slovaquie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tombes néolithiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les tombes néolithiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -74276,12 +74382,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -74347,33 +74447,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nouvelles techniques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "nouvelles techniques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nouvelles techniques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "nouvelles techniques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des techniques",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nouvelles techniques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de nouvelles techniques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -74401,12 +74508,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -74475,33 +74576,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "comportement",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "notre comportement",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "comportement",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "notre comportement",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "nous pouvons faire des choix concernant notre comportement",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "comportement",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "notre comportement",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -74517,12 +74625,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -74604,33 +74706,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les produits non disponibles sur place",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les produits non disponibles sur place",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les produits non disponibles sur place",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les produits non disponibles sur place",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "produits non disponibles sur place",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "produits non disponibles sur place, tel du silex de bonne qualité pour confectionner des outils",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "du silex de bonne qualité",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -74670,12 +74779,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -74726,33 +74829,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "silex de bonne qualité pour confectionner des outils",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "silex de bonne qualité",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "silex de bonne qualité pour confectionner des outils",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "silex de bonne qualité pour confectionner des outils",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "du silex de bonne qualité pour confectionner des outils",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "du silex de bonne qualité",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "du silex de bonne qualité",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -74786,12 +74896,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -74854,33 +74958,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour la reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour la reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour la reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -74926,12 +75037,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -74982,33 +75087,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Des aperçus de quelques sites à fossiles clefs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Des aperçus de quelques sites à fossiles clefs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Des aperçus de quelques sites à fossiles clefs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Des aperçus de quelques sites à fossiles clefs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des sites à fossiles clefs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quelques sites à fossiles clefs",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "quelques sites à fossiles clefs",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -75054,12 +75166,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -75110,33 +75216,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Italie centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Italie centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Italie centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Italie centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Italie centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Italie centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de l'Italie centrale",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -75170,12 +75283,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -75238,33 +75345,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "haplotypes",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "haplotypes",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "haplotypes",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "haplotypes",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "humains actuels",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "haplotypes (groupes de gènes transmis ensemble)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les haplotypes provenant des Néandertaliens",
-                  "rougeL": 0.47058823529411764
+                  "rougeL": 0.47058823529411764,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -75310,12 +75424,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -75369,33 +75477,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1991",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1991",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1991",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1991",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1991",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1991",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1991",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -75423,12 +75538,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -75497,33 +75606,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "matières organiques",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "matières organiques",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à partir de matières organiques",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "matières organiques, tels le bois ou la peau.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à partir de matières organiques, tels le bois ou la peau.",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "matières organiques, tels le bois ou la peau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à partir de matières organiques, tels le bois ou la peau.",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -75533,12 +75649,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -75622,33 +75732,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bois ou la peau",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "matières organiques",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "matières organiques, tels le bois ou la peau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bois ou la peau",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "matières organiques, tels le bois ou la peau.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bois ou la peau",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le bois ou la peau",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -75694,12 +75811,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -75753,33 +75864,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "des bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "des bijoux égarés ou des effets militaires abandonnés lors des derniers conflits.",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -75789,12 +75907,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -75875,33 +75987,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "bijoux égarés ou des effets militaires abandonnés lors des derniers conflits",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -75935,12 +76054,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -76003,33 +76116,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "systèmes militaires et religieux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes)",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la vie sociale par des systèmes militaires et religieux",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -76075,12 +76195,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -76125,33 +76239,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "par des systèmes militaires (pour se défendre des concurrents) et religieux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "militaires et religieux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "militaires et religieux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les systèmes militaires et religieux.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -76167,12 +76288,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -76253,33 +76368,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "primates",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "primates",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "primates",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "primates",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "primates, fossiles ou modernes",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "primates",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les primates",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -76319,12 +76441,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -76384,33 +76500,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Amérique",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Amérique",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Amérique",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Amériques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Eurasiatiques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Amérique",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Amérique",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -76444,12 +76567,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -76512,33 +76629,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "début du IIe millénaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "début du IIe millénaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "début du IIe millénaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "début du IIe millénaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans le IIe millénaire",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "du début du IIe millénaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au début du IIe millénaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -76578,12 +76702,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -76635,33 +76753,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "début du IIe millénaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "IIe millénaire",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "IIe millénaire",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "début du IIe millénaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "IIe millénaire",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "du début du IIe millénaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "début du IIe millénaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -76701,12 +76826,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -76764,33 +76883,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "23 sites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "23 sites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "23 sites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "résidus laitiers dans plus de 2200 tessons de poteries",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "23 sites datés entre le VIIe et le Ve millénaire",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les poteries provenant de 23 sites",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les sites du Nord-Ouest de l' Anatolie",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -76818,12 +76944,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -76892,33 +77012,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "réaliser une analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "réaliser une analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "R. Clarke demande à Laurent Bruxelles de réaliser une analyse stratigraphique poussée du lit de mort de Little Foot.",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -76958,12 +77085,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -77014,33 +77135,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une analyse stratigraphique poussée du lit de mort de Little Foot",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -77068,12 +77196,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -77142,33 +77264,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "vers l'au-delà",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vers l'au-delà",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -77202,12 +77331,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -77264,33 +77387,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' au-delà",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "vers l'au-delà",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' au-delà",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vers l'au-delà",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -77300,12 +77430,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -77392,33 +77516,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "équipe d' archéologues de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "archéologues de l' INRAP",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "équipe d' archéologues de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "équipe d' archéologues de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "INRAP",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une équipe d' archéologues de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean-François Pasty dirige l'équipe d'archéologues de l'INRAP.",
-                  "rougeL": 0.11111111111111112
+                  "rougeL": 0.11111111111111112,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -77458,12 +77589,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -77515,33 +77640,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "INRAP",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "archéologues de l' INRAP",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "INRAP",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "équipe d' archéologues de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "INRAP",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une équipe d' archéologues de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'équipe d'archéologues de l'INRAP",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -77551,12 +77683,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -77656,42 +77782,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petites populations isolées",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les pressions environnementales et la culture matérielle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les pressions environnementales et la culture matérielle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les pressions environnementales et la culture matérielle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les pressions environnementales et la culture matérielle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pressions environnementales et la culture matérielle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les ingrédients agissent différemment.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -77784,33 +77911,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "daims et de bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "daims et de bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "daims et de bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "daims et de bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "daims et bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "daims et de bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les daims et les bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -77856,12 +77990,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -77906,33 +78034,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "daims et de bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "daims et de bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "daims et de bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "daims et de bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "daims et bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "daims et de bisons",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les loups tuent les daims et les bisons.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -77960,12 +78095,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -78034,42 +78163,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes)",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "systèmes militaires et religieux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes)",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -78156,33 +78286,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "par des systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes) et religieux (pour maintenir la cohésion des groupes) et religieux (pour maintenir la cohésion",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes)",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "militaires et religieux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "militaires et religieux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -78210,12 +78347,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -78284,33 +78415,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "arbrisseaux d' armoise et autres hautes herbes",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "arbrisseaux d' armoise et autres hautes herbes",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "armoise et autres hautes herbes",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "arbrisseaux d' armoise et autres hautes herbes",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "armoise et autres hautes herbes",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "arbrisseaux d' armoise et autres hautes herbes",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les arbrisseaux d' armoise et autres hautes herbes",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -78344,12 +78482,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -78406,33 +78538,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "arbrisseaux d' armoise et autres hautes herbes",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "arbrisseaux d' armoise et autres hautes herbes",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "armoise et autres hautes herbes",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "arbrisseaux d' armoise et autres hautes herbes",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "armoise et autres hautes herbes",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "arbrisseaux d' armoise et autres hautes herbes",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les arbrisseaux d'armoise et autres hautes herbes",
-                  "rougeL": 0.8000000000000002
+                  "rougeL": 0.8000000000000002,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -78454,12 +78593,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -78534,33 +78667,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ADN récupéré d' un fragment d' os de doigt datant de 40000 ans et trouvé dans une grotte sibérienne",
-                  "rougeL": 0.967741935483871
+                  "rougeL": 0.967741935483871,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "via l' ADN",
-                  "rougeL": 0.3157894736842105
+                  "rougeL": 0.3157894736842105,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' ADN récupéré d' un fragment d' os de doigt datant de 40000 ans et trouvé dans une grotte sibérienne",
-                  "rougeL": 0.967741935483871
+                  "rougeL": 0.967741935483871,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' ADN récupéré d' un fragment d' os de doigt",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par l' ADN récupéré d' un fragment d' os de doigt datant de 40000 ans et trouvé dans une grotte sibérienne.",
-                  "rougeL": 0.967741935483871
+                  "rougeL": 0.967741935483871,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ADN récupéré d' un fragment d' os de doigt",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "via l'ADN récupéré d'un fragment d'os de doigt",
-                  "rougeL": 0.38461538461538464
+                  "rougeL": 0.38461538461538464,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -78570,12 +78710,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -78656,33 +78790,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ADN récupéré d' un fragment d' os de doigt datant de 40000 ans et trouvé dans une grotte sibérienne",
-                  "rougeL": 0.967741935483871
+                  "rougeL": 0.967741935483871,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "via l' ADN récupéré d' un fragment d' os de doigt datant de 40000 ans et trouvé dans une grotte sibérienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "via l' ADN récupéré d' un fragment d' os de doigt",
-                  "rougeL": 0.6086956521739131
+                  "rougeL": 0.6086956521739131,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "via l' ADN récupéré d' un fragment d' os de doigt",
-                  "rougeL": 0.6086956521739131
+                  "rougeL": 0.6086956521739131,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "via l' ADN récupéré d' un fragment d' os de doigt datant de 40000 ans et trouvé dans une grotte sibérienne.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ADN récupéré d' un fragment d' os de doigt datant de 40000 ans et trouvé dans une grotte sibérienne",
-                  "rougeL": 0.967741935483871
+                  "rougeL": 0.967741935483871,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "via l'ADN récupéré d'un fragment d'os de doigt",
-                  "rougeL": 0.38461538461538464
+                  "rougeL": 0.38461538461538464,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -78728,12 +78869,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -78784,33 +78919,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "véritable fouille subaquatique méthodique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une véritable fouille subaquatique méthodique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une véritable fouille subaquatique méthodique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fouille subaquatique méthodique",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une fouille subaquatique méthodique",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "réalisé une véritable fouille subaquatique méthodique",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "réalisé une véritable fouille subaquatique méthodique.",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -78838,12 +78980,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -78906,33 +79042,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "subaquatique méthodique",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "subaquatique méthodique",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "fouille subaquatique méthodique",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fouille subaquatique méthodique",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "subaquatique méthodique",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fouille subaquatique méthodique",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une véritable fouille subaquatique méthodique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -78978,12 +79121,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -79034,42 +79171,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "oppidum",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "oppidum",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "oppidum",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "oppidum",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "forteresse gauloise",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "oppidum (forteresse gauloise)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "On utilisait la forteresse au mont Cavalier à l'âge du Fer.",
-                  "rougeL": 0.15384615384615383
+                  "rougeL": 0.15384615384615383,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -79164,42 +79302,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sépulture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sépulture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sépulture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sépulture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sépulture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une sépulture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sépulture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -79286,33 +79425,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Kébara en Israël",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Kébara en Isral",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Kébara en Isral",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Kébara en Israël",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Kébara en Israël",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une sépulture contenant un squelette authentiquement néandertalien",
-                  "rougeL": 0.19999999999999998
+                  "rougeL": 0.19999999999999998,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Kébara en Israël",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -79328,12 +79474,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -79414,33 +79554,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les reptiles mammaliens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -79456,12 +79603,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -79539,33 +79680,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "j' en aie le cœur net",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "j' en aie le cur net",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "j' en aie le cur net",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ch. Emerson",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le narrateur",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "je décide",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Je",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -79599,12 +79747,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -79679,33 +79821,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tous les membres du groupe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tous les membres du groupe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "tous les membres du groupe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -79745,12 +79894,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -79813,33 +79956,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tous les membres du groupe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tous les membres du groupe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "tous les membres du groupe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -79873,12 +80023,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -79953,42 +80097,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' atmosphère vierge de toute pollution lumineuse du Great Divide Basin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "L' atmosphère vierge de toute pollution lumineuse du Great Divide Basin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' atmosphère vierge de toute pollution lumineuse du Great Divide Basin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' atmosphère vierge de toute pollution lumineuse du Great Divide Basin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' atmosphère vierge de toute pollution lumineuse du Great Divide Basin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' atmosphère vierge de toute pollution lumineuse du Great Divide Basin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'atmosphère vierge de toute pollution lumineuse du Great Divide Basin.",
-                  "rougeL": 0.8571428571428572
+                  "rougeL": 0.8571428571428572,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -80093,33 +80238,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les gènes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "gènes néandertaliens",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les gènes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "gènes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "gènes néandertaliens",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "gènes participant à la création des tissus des testicules",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les gènes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -80129,12 +80281,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -80221,33 +80367,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les bâtisseurs de cathédrales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -80293,12 +80446,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -80350,33 +80497,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -80410,12 +80564,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -80472,33 +80620,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "jour de la mort de leur maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le jour de la mort de leur maître.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -80508,12 +80663,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -80600,33 +80749,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -80654,12 +80810,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -80723,33 +80873,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Moselle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -80789,12 +80946,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -80852,33 +81003,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -80918,12 +81076,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -80980,33 +81132,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mâchoires et de ses dents",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mâchoires et de ses dents de grandes tailles",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ses mâchoires et de ses dents de grandes tailles",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dents",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "mâchoires et dents de grandes tailles",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mâchoires et de ses dents",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ses mâchoires et ses dents de grandes tailles.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -81022,12 +81181,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -81102,33 +81255,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mâchoires et de ses dents",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mâchoires et de ses dents de grandes tailles",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ses mâchoires et de ses dents",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dents",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "mâchoires et dents de grandes tailles",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mâchoires et de ses dents",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ses mâchoires et ses dents de grandes tailles.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -81174,12 +81334,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -81230,33 +81384,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sous les 25 mètres d' eau qui ont envahi la grotte depuis la déglaciation il y au moins 4000 ans",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sous les 25 mètres d' eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sous les 25 mètres d' eau qui ont envahi la grotte depuis la déglaciation il y au moins 4000 ans",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sous les 25 mètres d' eau qui ont envahi la grotte",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "25 mètres d'eau",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.45
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sous les 25 mètres d' eau qui ont envahi la grotte depuis la déglaciation il y au moins 4000 ans",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sous les 25 mètres d'eau",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 0.8
                 }
               },
               "human_annot": {
@@ -81554,48 +81715,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -81682,33 +81801,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes)",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes).",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "militaires et religieux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes)",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -81730,12 +81856,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -81804,33 +81924,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes) et religieux (pour maintenir la cohésion des groupes) et religieux (pour maintenir la",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la vie sociale par des systèmes militaires (pour se défendre des concurrents) et religieux (pour maintenir la cohésion des groupes)",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "militaires et religieux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la vie sociale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -81858,12 +81985,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -81932,33 +82053,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "explication de l' évolution rapide des hominines à l' époque des glaciations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "explication de l' évolution rapide des hominines à l' époque des glaciations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' explication de l' évolution rapide des hominines à l' époque des glaciations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' explication de l' évolution rapide des hominines à l' époque des glaciations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "explication de l' évolution rapide des hominines à l' époque des glaciations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' explication de l' évolution rapide des hominines à l' époque des glaciations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'explication de l'évolution rapide des hominines à l'époque des glaciations.",
-                  "rougeL": 0.608695652173913
+                  "rougeL": 0.608695652173913,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -81998,12 +82126,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -82061,42 +82183,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Sibérie occidentale",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "près du village d' Ust' - Ishim",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -82186,33 +82309,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Sibérie occidentale",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -82240,12 +82370,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -82335,33 +82459,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "carcasses de gnous et d' autres grands mammifères pour les découper et les manger",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "carcasses de gnous et d' autres grands mammifères",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des carcasses de gnous et d' autres grands mammifères",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "gnous et d' autres grands mammifères",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des carcasses de gnous et d' autres grands mammifères",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "carcasses de gnous et d' autres grands mammifères",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les carcasses de gnous et d'autres grands mammifères pour les découper et les manger.",
-                  "rougeL": 0.7826086956521738
+                  "rougeL": 0.7826086956521738,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -82377,12 +82508,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -82463,33 +82588,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cavalerie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "équipe d' archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' équipe d' archéologues qui fouille le mont Castel",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' équipe d' archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une équipe d' archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' équipe d' archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'équipe d'archéologues",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -82511,12 +82643,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -82591,33 +82717,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les aborigènes et une autre fois par les prédécesseurs des Vikings",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les aborigènes et une autre fois par les prédécesseurs des Vikings",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les aborigènes et une autre fois par les prédécesseurs des Vikings",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "aborigènes et une autre fois par les prédécesseurs des Vikings",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "aborigènes et les prédécesseurs des Vikings",
-                  "rougeL": 0.7692307692307693
+                  "rougeL": 0.7692307692307693,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les aborigènes et les prédécesseurs des Vikings",
-                  "rougeL": 0.7692307692307693
+                  "rougeL": 0.7692307692307693,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les aborigènes et les prédécesseurs des Vikings",
-                  "rougeL": 0.7692307692307693
+                  "rougeL": 0.7692307692307693,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -82663,12 +82796,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -82719,33 +82846,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "parmi deux possibles",
-                  "rougeL": 0.20689655172413793
+                  "rougeL": 0.20689655172413793,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "soit une femme vue de face, les bras au-dessus de la tête, associée à un addax (une grosse antilope du désert), soit un arbre associé à une peau animale tendue sur des bâtons croisés",
-                  "rougeL": 0.9387755102040816
+                  "rougeL": 0.9387755102040816,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "parmi deux possibles",
-                  "rougeL": 0.20689655172413793
+                  "rougeL": 0.20689655172413793,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "soit",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sous forme de femme vue de face et d'addax",
-                  "rougeL": 0.17142857142857146
+                  "rougeL": 0.17142857142857146,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "autour d' un couple d' éléments, choisi parmi deux possibles",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "choisi parmi",
-                  "rougeL": 0.07142857142857144
+                  "rougeL": 0.07142857142857144,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -82773,12 +82907,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -82841,33 +82969,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "deux",
-                  "rougeL": 0.07407407407407407
+                  "rougeL": 0.07407407407407407,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "deux",
-                  "rougeL": 0.07407407407407407
+                  "rougeL": 0.07407407407407407,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "soit une femme vue de face, les bras au-dessus de la tête, associée à un addax",
-                  "rougeL": 0.5945945945945945
+                  "rougeL": 0.5945945945945945,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "soit",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une femme vue de face, les bras au-dessus de la tête, associée à un addax (une grosse antilope du désert)",
-                  "rougeL": 0.7317073170731707
+                  "rougeL": 0.7317073170731707,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une femme vue de face, les bras au-dessus de la tête, associée à un addax (une grosse antilope du désert), soit un arbre associé à une peau animale tendue sur des bâtons croisés",
-                  "rougeL": 0.9387755102040816
+                  "rougeL": 0.9387755102040816,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Parmi deux possibles.",
-                  "rougeL": 0.20689655172413793
+                  "rougeL": 0.20689655172413793,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -82895,12 +83030,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -82969,33 +83098,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Alexandre Sévère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Maximin Ier le Thrace",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' empereur Maximin Ier le Thrace",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Alexandre Sévère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une colonne romaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une colonne romaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une colonne romaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -83011,12 +83147,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -83088,33 +83218,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Plus nos connaissances sur ces oscillations climatiques progressent, plus nous réalisons à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Plus nos connaissances sur ces oscillations climatiques progressent, plus nous réalisons à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Des carottes extraites des calottes glaciaires et des boues du fond océanique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "instables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Plus nos connaissances sur ces oscillations climatiques progressent",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "nous réalisons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -83136,12 +83273,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -83216,33 +83347,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Plus nos connaissances sur ces oscillations climatiques progressent, plus nous réalisons à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Plus nos connaissances sur ces oscillations climatiques progressent, plus nous réalisons à quel point les anciens environnements de nos ancêtres ont dû être instables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Plus nos connaissances sur ces oscillations climatiques progressent",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des carottes extraites des calottes glaciaires et des boues du fond océanique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "nous réalisons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -83264,12 +83402,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -83344,33 +83476,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "aux archives",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "aux archives",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "aux archives",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "archives",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "aux archives",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "aux archives",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "aux archives",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -83398,12 +83537,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -83472,33 +83605,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Une scène de chasse au taureau datant du VIe millénaire avant notre ère",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Une scène de chasse au taureau datant du VIe millénaire avant notre ère",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "scène de chasse au taureau",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "scène de chasse au taureau",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "une scène de chasse au taureau",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "scène de chasse au taureau",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Une scène de chasse au taureau.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -83532,12 +83672,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -83594,33 +83728,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "scène de chasse au taureau datant du VIe millénaire avant notre ère",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chasse au taureau",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chasse au taureau",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chasse au taureau",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "chasse au taureau",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "scène de chasse au taureau",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Une scène de chasse au taureau.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -83666,12 +83807,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -83722,33 +83857,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "si les ossements sont séparés en deux groupes, c' est parce que la brèche qui les contenait tous initialement (résultant de l' éboulis sur lequel est mort Little Foot) s' est effondrée,",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "si les ossements sont séparés en deux groupes, c' est parce que la brèche qui les contenait tous initialement (résultant de l' éboulis sur lequel est mort Little Foot) s' est eff",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la brèche qui les contenait tous initialement (résultant de l' éboulis sur lequel est mort Little Foot) s' est effondrée",
-                  "rougeL": 0.30769230769230765
+                  "rougeL": 0.30769230769230765,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ils comprennent que si les ossements sont séparés en deux groupes, c' est parce que la brèche qui les contenait tous initialement",
-                  "rougeL": 0.9600000000000001
+                  "rougeL": 0.9600000000000001,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "qu'ils sont séparés en deux groupes",
-                  "rougeL": 0.35294117647058826
+                  "rougeL": 0.35294117647058826,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "séparés en deux groupes",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les chercheurs apprennent que les ossements sont séparés en deux groupes",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -83764,12 +83906,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -83851,33 +83987,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "construction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "construction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "renforts métalliques de la cathédrale de Beauvais",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "début de la construction",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "construction de la cathédrale de Beauvais",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' usage de tirants et de chaînages en acier",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la construction de la cathédrale de Beauvais",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -83899,12 +84042,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -83991,33 +84128,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ian Hodder",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ian Hodder",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ian Hodder",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ian Hodder",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ian Hodder, de l' Université Stanford",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ian Hodder",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ian Hodder",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -84051,12 +84195,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -84131,33 +84269,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pastoralisme et de l' agriculture",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "laitière",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "industrie laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "culture des vases à entonnoir",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'industrie laitière",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' industrie laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' industrie laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -84173,12 +84318,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -84265,33 +84404,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pastoralisme et de l' agriculture sous la forme de la culture des vases à entonnoir",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "industrie laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "industrie laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "culture des vases à entonnoir",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'industrie laitière",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' industrie laitière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'industrie laitière",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -84307,12 +84453,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -84393,33 +84533,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -84453,12 +84600,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -84515,33 +84656,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -84551,12 +84699,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -84643,33 +84785,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la plus vieille œuvre figurative de la grotte Chauvet – un rhinocéros – date de 35000 à 38000 ans",
-                  "rougeL": 0.3846153846153846
+                  "rougeL": 0.3846153846153846,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -84841,48 +84990,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -85089,33 +85196,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "35000 à 38000 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots dans l'article qui répond à la question \"De quand date la plus vieille oeuvre figurative de la grotte Chauvet ?\" est \"de 35000 à 38000 ans\".",
-                  "rougeL": 0.37037037037037035
+                  "rougeL": 0.37037037037037035,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -85143,12 +85257,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -85217,33 +85325,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un chef de milice gauloise",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -85253,12 +85368,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -85339,33 +85448,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -85399,12 +85515,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -85467,33 +85577,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le plafond d' une salle souterraine s' écroule en partie et crée un puits",
-                  "rougeL": 0.19999999999999998
+                  "rougeL": 0.19999999999999998,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "un puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un puits",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Un puits.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -85503,12 +85620,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -85597,33 +85708,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sépulture près de la tombe de Sobekhotep Ier",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une sépulture près de la tombe de Sobekhotep Ier",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une sépulture près de la tombe de Sobekhotep Ier",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une sépulture près de la tombe de Sobekhotep Ier",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sépulture près de la tombe de Sobekhotep Ier",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une sépulture près de la tombe de Sobekhotep Ier",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une sépulture près de la tombe de Sobekhotep Ier",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -85663,12 +85781,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -85725,33 +85837,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Un scribe qui ne sait pas le sumérien, mais quel scribe est -ce là?",
-                  "rougeL": 0.4285714285714285
+                  "rougeL": 0.4285714285714285,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sumérien",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mésopotamienne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sumérien",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Sumer",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Sumer",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Un proverbe sumérien.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -85761,12 +85880,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -85853,33 +85966,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "vers l'au-delà",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vers l'au-delà",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -85913,12 +86033,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -85975,33 +86089,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au-delà",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' au-delà",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "vers l'au-delà",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vers l' au-delà",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vers l'au-delà",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -86035,12 +86156,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -86103,33 +86218,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petits qui n' étaient pas les siens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les petits qui n' étaient pas les siens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "petits qui n' étaient pas les siens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les petits",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les petits qui n' étaient pas les siens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les petits qui n' étaient pas les siens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les petits",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -86163,12 +86285,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -86231,33 +86347,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "2007",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "2007",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "lors de fouilles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "2007.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en 2007",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "2007",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 2007",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -86297,12 +86420,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -86353,33 +86470,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "2007",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "2007",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "2007",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "2007.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "2007",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "2007",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "2007.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -86413,12 +86537,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -86481,33 +86599,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un chef de milice gauloise",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -86535,12 +86660,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -86609,42 +86728,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "telle un chaudron débordant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Afrique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chaudron débordant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Afrique a émis des vagues successives d' homonidés.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "chaudron débordant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la planète",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour s'installer sur la planète.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -86749,33 +86869,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -86815,12 +86942,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -86883,33 +87004,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le fromage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -86919,12 +87047,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -87011,33 +87133,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "3000 ans environ",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "3000 ans environ",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "3000 ans",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "3000 ans",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "3000 ans environ",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "3000 ans environ",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "depuis 3000 ans environ.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -87053,12 +87182,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -87139,33 +87262,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "INRAP",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "INRAP",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "équipe de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' INRAP",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "INRAP",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une équipe de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Une équipe de l'INRAP.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -87205,12 +87335,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -87262,33 +87386,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "INRAP",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "INRAP",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "équipe de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' INRAP",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "INRAP",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une équipe de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une équipe de l' INRAP",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -87298,12 +87429,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -87391,33 +87516,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "49 espèces de petite taille",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -87457,12 +87589,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -87513,33 +87639,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "49 espèces de petite taille",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "29 espèces de carnivores de grande taille et 49 espèces de petite taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -87573,12 +87706,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -87653,33 +87780,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les gènes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ossements néandertaliens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "gènes néandertaliens",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "gènes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les gènes des Homo sapiens favorisant la fertilité",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "gènes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les gènes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -87719,12 +87853,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -87781,33 +87909,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'eau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'eau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -87847,12 +87982,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -87903,33 +88032,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'eau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' eau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'eau par évaporation",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -87963,12 +88099,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -88031,33 +88161,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trop d' énergie et de nourriture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "trop d' énergie et de nourriture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trop d' énergie et de nourriture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "trop d' énergie et de nourriture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "nourriture et d' autres soins",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "trop d' énergie et de nourriture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de l'énergie et de la nourriture",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -88085,12 +88222,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -88159,33 +88290,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "utilisé des images satellitaires",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Berhane Asfaw",
-                  "rougeL": 0.16666666666666666
+                  "rougeL": 0.16666666666666666,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des images satellitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des images satellitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Berhane Asfaw, du Service de recherche de la Vallée du Rift à Addis-Abeba",
-                  "rougeL": 0.0909090909090909
+                  "rougeL": 0.0909090909090909,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "utilisé des images satellitaires",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les collègues ont utilisé des images satellitaires.",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -88201,12 +88339,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -88282,33 +88414,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des images satellitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "images satellitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des images satellitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "images satellitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "images satellitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "images satellitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des images satellitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -88348,12 +88487,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -88411,33 +88544,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un niveau archéologiquement stérile",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un niveau archéologiquement stérile",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un niveau archéologiquement stérile",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un niveau archéologiquement stérile",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "2000 objets",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un niveau archéologiquement stérile",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un niveau archéologiquement stérile",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -88471,12 +88611,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -88533,33 +88667,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "archéologiquement stérile",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "stérile",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un niveau archéologiquement stérile",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un niveau archéologiquement stérile",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "2 mètres de côté",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un niveau archéologiquement stérile",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "un niveau archéologiquement stérile.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -88587,12 +88728,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -88661,33 +88796,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Min Zhu",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Entelognathus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' Entelognathus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Entelognathus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Entelognathus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Entelognathus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les collègues chinois et suédois travaillent sur la découverte et l'étude d'un fossile de poisson nommé Entelognathus.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -88709,12 +88851,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -88785,33 +88921,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' Entelognathus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Entelognathus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Entelognathus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Entelognathus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "poisson à plaques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "poisson à plaques, mais doté d' une mâchoire osseuse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "phylogénie des poissons",
-                  "rougeL": 0.1818181818181818
+                  "rougeL": 0.1818181818181818,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -88827,12 +88970,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -88927,33 +89064,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les chasseurs - cueilleurs Hadza, en Tanzanie, et par les San, au Botswana",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les San",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cueilleurs Hadza, en Tanzanie, et par les San, au Botswana",
-                  "rougeL": 0.23529411764705882
+                  "rougeL": 0.23529411764705882,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les San",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les chasseurs - cueilleurs Hadza, en Tanzanie, et par les San, au Botswana",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Hadza, en Tanzanie, et par les San, au Botswana",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les chasseurs - cueilleurs Hadza, en Tanzanie, et par les San, au Botswana,",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -88999,12 +89143,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -89055,33 +89193,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chasses bien plus anciennes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chasses bien plus anciennes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chasses bien plus anciennes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chasses bien plus anciennes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "chasses bien plus anciennes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "chasses bien plus anciennes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' existence de chasses bien plus anciennes",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -89121,12 +89266,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -89177,33 +89316,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "existence de chasses bien plus anciennes",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "existence de chasses bien plus anciennes",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "existence de chasses bien plus anciennes",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' existence de chasses bien plus anciennes.",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'existence de chasses bien plus anciennes",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' existence de chasses bien plus anciennes",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'existence de chasses bien plus anciennes.",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -89237,12 +89383,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -89305,33 +89445,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "par évaporation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "par évaporation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "par évaporation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "Camembert_baseline": {
                   "answer_pred": "par évaporation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "llama-2_lora": {
                   "answer_pred": "par évaporation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "par évaporation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "GPT-3.5": {
                   "answer_pred": "par évaporation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 }
               },
               "human_annot": {
@@ -89545,48 +89692,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -89758,33 +89863,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "presque 16 mètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "presque 16 mètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -89794,12 +89906,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -89881,33 +89987,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "presque 16 mètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "presque 16 mètres de large",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -89941,12 +90054,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -90010,33 +90117,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la forme symbolique d' une croix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la forme symbolique d' une croix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la forme symbolique d' une croix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la forme symbolique d' une croix)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la forme symbolique d' une croix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la forme symbolique d' une croix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le transept donne au bâtiment la forme symbolique d'une croix",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -90070,12 +90184,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -90132,33 +90240,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la forme symbolique d' une croix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la forme symbolique d' une croix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la forme symbolique d' une croix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "donne au bâtiment la forme symbolique d' une croix)",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la forme symbolique d' une croix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la forme symbolique d' une croix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le transept donne au bâtiment la forme symbolique d'une croix",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -90192,12 +90307,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -90260,33 +90369,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dents servent d' armes dans leurs combats pour la reproduction",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ces dents servent d' armes dans leurs combats pour la reproduction.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour se reproduire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "combats pour la reproduction",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour la reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -90320,12 +90436,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -90382,33 +90492,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "combats pour la reproduction",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pour la reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour la reproduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "combats pour la reproduction",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans leurs combats pour la reproduction.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -90454,12 +90571,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -90510,33 +90621,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les archéologues.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -90576,12 +90694,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -90635,33 +90747,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "150 mètres",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "150 mètres",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "150 mètres",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "50 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "50 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "environ 50 mètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "150 mètres",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -90689,12 +90808,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -90757,33 +90870,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "150 mètres",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "150 mètres",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "150 mètres",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "50 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "50 mètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "environ 50 mètres or 150 mètres",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "150 mètres.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -90829,12 +90949,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -90885,33 +90999,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fouilleurs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fouilleurs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "fouilleurs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fossiles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des fouilleurs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les fouilleurs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "on a par exemple trouvé",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -90933,12 +91054,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -91014,42 +91129,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le métissage entre Homo sapiens et Néandertaliens a provoqué l' arrivée de gènes mal tolérés dans le patrimoine génétique de l' homme",
-                  "rougeL": 0.9777777777777777
+                  "rougeL": 0.9777777777777777,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "que le métissage entre Homo sapiens et Néandertaliens a provoqué l' arrivée de gènes mal tolérés dans le patrimoine génétique de l' homme",
-                  "rougeL": 0.9777777777777777
+                  "rougeL": 0.9777777777777777,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "que le métissage entre Homo sapiens et Néandertaliens a provoqué l' arrivée de gènes mal tolérés dans le patrimoine génétique de l' homme",
-                  "rougeL": 0.9777777777777777
+                  "rougeL": 0.9777777777777777,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le métissage entre Homo sapiens et Néandertaliens a provoqué l' arrivée de gènes mal tolérés dans le patrimoine génétique de l' homme,",
-                  "rougeL": 0.9777777777777777
+                  "rougeL": 0.9777777777777777,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'arrivée de gènes mal tolérés dans le patrimoine génétique de l'homme",
-                  "rougeL": 0.36363636363636365
+                  "rougeL": 0.36363636363636365,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le métissage entre Homo sapiens et Néandertaliens a provoqué l' arrivée de gènes mal tolérés dans le patrimoine génétique de l' homme, de sorte que la sélection naturelle les a éliminés de régions entières du génome",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "suggère que le métissage entre Homo sapiens et Néandertaliens a provoqué l'arrivée de gènes mal tolérés dans le patrimoine génétique de l'homme, de sorte que la sélection naturelle les a éliminés de régions entières du génome.",
-                  "rougeL": 0.7241379310344828
+                  "rougeL": 0.7241379310344828,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -91136,33 +91252,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le métissage entre Homo sapiens et Néandertaliens a provoqué l' arrivée de gènes mal tolérés dans le patrimoine génétique de l' homme",
-                  "rougeL": 0.9777777777777777
+                  "rougeL": 0.9777777777777777,
+                  "HScore": 0.75
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le métissage entre Homo sapiens et Néandertaliens a provoqué l' arrivée de gènes mal tolérés dans le patrimoine génétique de l' homme",
-                  "rougeL": 0.9777777777777777
+                  "rougeL": 0.9777777777777777,
+                  "HScore": 0.75
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le métissage entre Homo sapiens et Néandertaliens a provoqué l' arrivée de gènes mal tolérés dans le patrimoine génétique de l' homme",
-                  "rougeL": 0.9777777777777777
+                  "rougeL": 0.9777777777777777,
+                  "HScore": 0.75
                 },
                 "Camembert_baseline": {
                   "answer_pred": "métissage entre Homo sapiens et Néandertaliens a provoqué l' arrivée de gènes mal tolérés dans le patrimoine génétique de l' homme,",
-                  "rougeL": 0.9777777777777777
+                  "rougeL": 0.9777777777777777,
+                  "HScore": 0.75
                 },
                 "llama-2_lora": {
                   "answer_pred": "ils suggèrent que le métissage entre Homo sapiens et Néandertaliens a provoqué l'arrivée de gènes mal tolérés dans le patrimoine génétique de l'homme, de sorte que la sélection naturelle les a éliminés de régions entières du génome.",
-                  "rougeL": 0.689655172413793
+                  "rougeL": 0.689655172413793,
+                  "HScore": 0.95
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' arrivée de gènes mal tolérés dans le patrimoine génétique de l' homme, de sorte que la sélection naturelle les a éliminés de régions entières du génome",
-                  "rougeL": 0.3902439024390244
+                  "rougeL": 0.3902439024390244,
+                  "HScore": 0.8
                 },
                 "GPT-3.5": {
                   "answer_pred": "les déserts d'hérédité néandertalienne suggèrent que le métissage entre Homo sapiens et Néandertaliens a provoqué l'arrivée de gènes mal tolérés dans le patrimoine génétique de l'homme, de sorte que la sélection naturelle les a éliminés de régions entières du génome.",
-                  "rougeL": 0.5797101449275363
+                  "rougeL": 0.5797101449275363,
+                  "HScore": 0.95
                 }
               },
               "human_annot": {
@@ -91460,48 +91583,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -91588,33 +91669,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des résidus laitiers",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des résidus laitiers",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des résidus laitiers",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "résidus laitiers",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "résidus laitiers",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des résidus laitiers dans plus de 2200 tessons de poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "résidus laitiers",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -91654,12 +91742,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -91716,33 +91798,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les femmes et les vieillards",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les vieillards et les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -91788,12 +91877,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -91846,33 +91929,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trois squelettes partiels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dinosaure qui y ressemble, sur la base de trois squelettes partiels mis au jour",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un dinosaure qui y ressemble, sur la base de trois squelettes partiels mis au jour",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "trois squelettes partiels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "un poulet géant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un dinosaure qui ressemble à un poulet géant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un dinosaure",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -91918,12 +92008,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -91970,33 +92054,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trois squelettes partiels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Anzu wyliei",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un dinosaure qui y ressemble, sur la base de trois squelettes partiels mis au jour dans le Dakota",
-                  "rougeL": 0.5555555555555556
+                  "rougeL": 0.5555555555555556,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un dinosaure omnivore ressemblant à un poulet géant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un poulet géant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un dinosaure qui ressemble à un poulet géant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un dinosaure qui y ressemble",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -92012,12 +92103,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -92100,33 +92185,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour peupler l' Asie du Sud-Est et l' Europe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pour peupler l' Asie du Sud-Est et l' Europe.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour peupler l'Asie du Sud-Est et l'Europe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour peupler l'Asie du Sud-Est et l'Europe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -92136,12 +92228,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -92228,33 +92314,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un chef de milice gauloise",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -92294,12 +92387,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -92350,33 +92437,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chef de milice gauloise de la cité des Médiomatriques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un chef de milice gauloise",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "chef de milice gauloise",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le chef de milice gauloise des Médiomatriques",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -92410,12 +92504,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -92483,33 +92571,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hervé le Treut.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -92543,12 +92638,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -92624,33 +92713,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Robert Debré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Robert Debré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Robert Debré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Robert Debré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Robert Debré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Robert Debré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Robert Debré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -92684,12 +92780,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -92752,33 +92842,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Institut photovoltaque d' Île de France",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Institut photovoltaque d' Île de France",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -92794,12 +92891,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -92874,33 +92965,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Institut photovoltaque d' Île de France",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Institut photovoltaque d' Île de France",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'Institut photovoltaïque d'Île de France.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -92910,12 +93008,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -93002,33 +93094,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "médicaments, résidus de l' agriculture ou de l' industrie chimique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "molécules, médicaments, résidus de l' agriculture ou de l' industrie chimique",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -93044,12 +93143,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -93131,33 +93224,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "lasers intenses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "lasers intenses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "lasers intenses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "lasers intenses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un rayon laser hyperpuissant",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "rayon laser hyperpuissant",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "un rayon laser hyperpuissant",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -93203,12 +93303,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -93253,33 +93347,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "lasers intenses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "lasers intenses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "intenses",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "lasers intenses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un rayon laser hyperpuissant",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "rayon laser hyperpuissant",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un rayon laser hyperpuissant",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -93289,12 +93390,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -93381,33 +93476,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "maladies génétiques qui affectent les nerfs et les muscles",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -93453,12 +93555,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -93503,33 +93599,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -93563,12 +93666,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -93631,33 +93728,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les mystérieuses « matière noire » et « énergie sombre »",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "matière noire » et « énergie sombre »",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les mystérieuses « matière noire » et « énergie sombre »",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les mystérieuses « matière noire » et « énergie sombre »",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la quantité de matière noire et énergie sombre",
-                  "rougeL": 0.5714285714285714
+                  "rougeL": 0.5714285714285714,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "matière noire et énergie sombre",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les mystérieuses « matière noire » et « énergie sombre »",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -93703,12 +93807,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -93771,42 +93869,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ses très officielles fonctions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ses très officielles fonctions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ses très officielles fonctions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ses très officielles fonctions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les fonctions officielles",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "globules et des plaquettes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "ses très officielles fonctions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -93905,33 +94004,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "professeur de médecine, chercheur, spécialiste des virus, directeur de l' institut national de la transfusion sanguine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "professeur de médecine, chercheur, spécialiste des virus, directeur de l' institut national de la transfusion sanguine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "très officielles fonctions",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ses très officielles fonctions",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "détective littéraire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "très officielles fonctions",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ses très officielles fonctions",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -93953,12 +94059,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -94030,33 +94130,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "GIEC",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "GIEC",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "aux travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "aux travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "GIEC",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hervé le Treut participe activement aux travaux du GIEC.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -94084,12 +94191,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -94152,33 +94253,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "aux travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "aux travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "aux travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "aux travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Académie des Sciences",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hervé le Treut participe activement aux travaux du GIEC.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -94212,12 +94320,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -94280,33 +94382,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "spéculations hasardeuses en observations plus ou moins fiables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "spéculations hasardeuses en observations plus ou moins fiables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "aller de spéculations hasardeuses en observations plus ou moins fiables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la cosmologie théorique dut aller de spéculations hasardeuses en observations plus ou moins fiables",
-                  "rougeL": 0.35294117647058826
+                  "rougeL": 0.35294117647058826,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par des spéculations hasardeuses en observations plus ou moins fiables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "spéculations hasardeuses en observations plus ou moins fiables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "observations plus ou moins fiables",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -94352,12 +94461,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -94405,33 +94508,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "théorique",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "théorique",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la cosmologie théorique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la cosmologie théorique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la cosmologie théorique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cosmologie théorique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la cosmologie théorique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -94465,12 +94575,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -94536,33 +94640,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des plats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des plats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "plats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "plats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des plats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -94578,12 +94689,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -94664,33 +94769,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "son laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "son laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -94706,12 +94818,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -94793,33 +94899,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "neuronaux de la locomotion",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "neuronaux de la locomotion",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "neuronaux de la locomotion",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "certains mécanismes neuronaux de la locomotion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "neuronaux de la locomotion",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mécanismes neuronaux de la locomotion",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "certains mécanismes neuronaux de la locomotion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -94841,12 +94954,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -94921,33 +95028,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quantité de matière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "matière",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "matière noire » et « énergie sombre »",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "matière",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "quantité de matière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Univers",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il contient de la matière.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -94975,12 +95089,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -95049,33 +95157,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les ouvriers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mines d' or",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mines d' or",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ouvriers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans le désert Égyptien",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les ouvriers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les ouvriers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -95091,12 +95206,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -95177,33 +95286,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -95237,12 +95353,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -95300,33 +95410,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -95360,12 +95477,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -95423,33 +95534,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -95483,12 +95601,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -95552,33 +95664,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nous avons soulevé un couvercle de pierre et nous avons été éblouis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "nous avons soulevé un couvercle de pierre et nous avons été éblouis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nous avons soulevé un couvercle de pierre et nous avons été éblouis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "nous avons soulevé un couvercle de pierre et nous avons été éblouis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "nous avons soulevé un couvercle de pierre et nous avons été éblouis.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nous avons soulevé un couvercle de pierre et nous avons été éblouis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Christiane Ziegler dit alors : nous avons soulevé un couvercle de pierre et nous avons été éblouis.",
-                  "rougeL": 0.5555555555555556
+                  "rougeL": 0.5555555555555556,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -95606,12 +95725,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -95692,33 +95805,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jean-François Bach",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jean-François Bach",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jean-François Bach",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jean-François Bach,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jean-François Bach",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jean-François Bach",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean-François Bach",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -95752,12 +95872,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -95817,33 +95931,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un grand bouleversement est en train de menacer tous les équilibres",
-                  "rougeL": 0.9
+                  "rougeL": 0.9,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bouleversement est en train de menacer tous les équilibres",
-                  "rougeL": 0.8421052631578948
+                  "rougeL": 0.8421052631578948,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un grand bouleversement est en train de menacer tous les équilibres : le réchauffement climatique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un grand bouleversement est en train de menacer tous les équilibres : le réchauffement climatique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le réchauffement climatique",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "réchauffement climatique",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un grand bouleversement est en train de menacer tous les équilibres",
-                  "rougeL": 0.9
+                  "rougeL": 0.9,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -95877,12 +95998,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -95963,33 +96078,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Olivier Torrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Olivier Torrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Olivier Torrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Olivier Torrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Olivier Torrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Olivier Torrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Olivier Torrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -96023,12 +96145,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -96103,33 +96219,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "LHC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "anneau de plus de 26 km de circonférence",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le LHC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "LHC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "au sein du grand anneau de plus de 26 km de circonférence",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans le LHC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans le LHC, ce grand anneau de plus de 26 km de circonférence",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -96169,12 +96292,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -96237,33 +96354,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les uns contre les autres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les uns contre les autres à une vitesse proche de la lumière",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le LHC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les uns contre les autres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à une vitesse proche de la lumière",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un grand anneau de plus de 26 km de circonférence où sont envoyés des paquets de protons les uns contre les autres à une vitesse proche de la lumière",
-                  "rougeL": 0.1904761904761905
+                  "rougeL": 0.1904761904761905,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les uns contre les autres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -96297,12 +96421,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -96383,33 +96501,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -96443,12 +96568,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -96511,33 +96630,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les travaux en relation avec l' exploration spatiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "travaux en relation avec l' exploration spatiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les travaux en relation avec l' exploration spatiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les travaux en relation avec l' exploration spatiale.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le comité mondial de la recherche spatiale",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les travaux en relation avec l' exploration spatiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Cospar organise les travaux en relation avec l'exploration spatiale.",
-                  "rougeL": 0.46153846153846156
+                  "rougeL": 0.46153846153846156,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -96577,12 +96703,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -96633,33 +96753,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en relation avec l' exploration spatiale",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "exploration spatiale",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les travaux en relation avec l' exploration spatiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les travaux en relation avec l' exploration spatiale.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'exploration spatiale",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "travaux en relation avec l' exploration spatiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les travaux en relation avec l'exploration spatiale.",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -96675,12 +96802,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -96761,33 +96882,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "10 ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "il y a 10 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "10 ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il y a 10 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "10 ans avant",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il y a 10 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il y a 10 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -96815,12 +96943,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -96907,33 +97029,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jeune physicien de l' atmosphère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jeune physicien de l' atmosphère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jean-Louis Fellous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jeune physicien de l' atmosphère,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jeune physicien de l' atmosphère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jeune physicien de l' atmosphère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean-Louis Fellous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -96973,12 +97102,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -97035,33 +97158,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le temps d' écrire sous un nom d' emprunt dans des revues de poésie",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "revues de poésie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le temps d' écrire sous un nom d' emprunt dans des revues de poésie",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le temps d' écrire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "temps d' écrire sous un nom d' emprunt dans des revues de poésie",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le temps d' écrire sous un nom d' emprunt dans des revues de poésie",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le temps",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -97077,12 +97207,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -97163,33 +97287,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en Arabie Saoudite",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -97199,12 +97330,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -97287,33 +97412,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en Arabie Saoudite",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des malades",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -97323,12 +97455,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -97417,33 +97543,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "20 centimètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "20 centimètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "20 centimètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "20 centimètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "plus de 20 centimètres",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "près de 20 centimètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "près de 20 centimètres.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -97453,12 +97586,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -97539,42 +97666,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "20 centimètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "20 centimètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "20 centimètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "20 centimètres",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "plus de 20 centimètres",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "près de 20 centimètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "mesurent pour certains près de 20 centimètres",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -97679,33 +97807,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' homme de Néandertal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "homme de Néandertal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' homme de Néandertal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' homme de Néandertal,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' homme de Néandertal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' homme de Néandertal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'homme de Néandertal",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -97739,12 +97874,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -97813,33 +97942,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Néandertal",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "homme de Néandertal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "homme de Néandertal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' homme de Néandertal,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' homme de Néandertal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' homme de Néandertal",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'homme de Néandertal",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -97861,12 +97997,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -97941,33 +98071,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Institut Allemand Max Planck",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Institut Allemand Max Planck",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Institut Allemand Max Planck",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le prestigieux Institut Allemand Max Planck",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Institut Allemand Max Planck",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le prestigieux Institut Allemand Max Planck",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Institut Allemand Max Planck",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -98001,12 +98138,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -98064,33 +98195,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Institut Allemand Max Planck",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Institut Allemand Max Planck",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Institut Allemand Max Planck",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Max Planck",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Institut Allemand Max Planck",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Institut Allemand Max Planck",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Institut Allemand Max Planck",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -98118,12 +98256,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -98193,33 +98325,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "stress, la qualité du sommeil, la relation entre la santé du patron et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le stress, la qualité du sommeil, la relation entre la santé du patron et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le stress, la qualité du sommeil, la relation entre la santé du patron et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "salariés",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le stress, la qualité du sommeil, la relation entre la santé du patron et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le stress, la qualité du sommeil, la relation entre la santé du patron et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le stress, la qualité du sommeil, la relation entre la santé du patron et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -98247,12 +98386,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -98315,33 +98448,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "stress, la qualité du sommeil, la relation entre la santé du dirigeant et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le stress, la qualité du sommeil, la relation entre la santé du patron et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le stress, la qualité du sommeil, la relation entre la santé du patron et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la relation entre la santé du patron et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "stress, qualité du sommeil, relation entre la santé du patron et celle de son entreprise, relation entre la santé du patron et celle de ses salariés.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "La santé du dirigeant : de la souffrance patronale à l' entrepreneuriat salutaire",
-                  "rougeL": 0.09090909090909091
+                  "rougeL": 0.09090909090909091,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la santé du dirigeant",
-                  "rougeL": 0.11764705882352941
+                  "rougeL": 0.11764705882352941,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -98357,12 +98497,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -98461,33 +98595,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jean-Louis Fellous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jean-Louis Fellous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jean-Louis Fellous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jean-Louis Fellous,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "devenir guitariste professionnel",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jeune physicien de l' atmosphère",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean-Louis Fellous",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -98533,12 +98674,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -98589,33 +98724,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dioxyde de carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dioxyde de carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au dioxyde de carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dioxyde de carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dioxyde de carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dioxyde de carbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "du dioxyde de carbone.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -98649,12 +98791,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -98743,33 +98879,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "À l'aide d'ordinateurs superpuissants",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -98797,12 +98940,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -98895,33 +99032,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jean Lilensten",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jean Lilensten",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jean Lilensten",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jean Lilensten",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jean Lilensten",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jean Lilensten",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean Lilensten",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -98955,12 +99099,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -99024,33 +99162,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "biodiversité",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "essences, les sols et toute la biodiversité",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "biodiversité",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "biodiversité",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "essences, sols et toute la biodiversité",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "essences, sols et toute la biodiversité",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les essences, les sols et toute la biodiversité",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -99066,12 +99211,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -99170,33 +99309,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Denis le Bihan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Denis le Bihan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Denis le Bihan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Denis Le Bihan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Denis le Bihan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Denis Le Bihan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Denis Le Bihan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -99212,12 +99358,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -99299,33 +99439,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -99353,12 +99500,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -99421,33 +99562,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -99493,12 +99641,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -99549,33 +99691,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mieux anticiper l' avenir",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mieux anticiper l' avenir",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "afin de mieux anticiper l' avenir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mieux anticiper l' avenir.",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour mieux anticiper l' avenir",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "afin de mieux anticiper l' avenir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "afin de mieux anticiper l'avenir",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -99615,12 +99764,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -99672,33 +99815,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mieux anticiper l' avenir",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mieux anticiper l' avenir",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "meilleur anticiper l' avenir",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mieux anticiper l' avenir.",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "anticiper l' avenir",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "afin de mieux anticiper l' avenir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "afin de mieux anticiper l'avenir",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -99738,12 +99888,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -99801,33 +99945,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "astrophysique",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "faire de l' astrophysique",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "de l' astrophysique",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "faire de l' astrophysique",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "astronomie.",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "astrophysique",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Daniel Kunth a choisi de faire de l'astrophysique.",
-                  "rougeL": 0.3157894736842105
+                  "rougeL": 0.3157894736842105,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -99855,12 +100006,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -99929,33 +100074,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -100001,12 +100153,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -100051,33 +100197,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "quatre-vingt",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "quatre-vingt",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -100111,12 +100264,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -100179,33 +100326,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Serge Volkoff",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Serge Volkoff",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Serge Volkoff",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Serge Volkoff",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Serge Volkoff",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Serge Volkoff",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Serge Volkoff",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -100239,12 +100393,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -100307,33 +100455,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une évidence pour la communauté scientifique",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une évidence pour la communauté scientifique",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une évidence pour la communauté scientifique",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une évidence pour la communauté scientifique",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "évidence pour la communauté scientifique",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une évidence pour la communauté scientifique",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une évidence pour la communauté scientifique.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -100367,12 +100522,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -100435,33 +100584,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' avenir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "avenir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "100 % carbone, les nanotubes et le graphène",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' avenir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "100 % carbone, les nanotubes et le graphène",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nanotubes et le graphène",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les nanotubes et le graphène",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -100501,12 +100657,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -100563,33 +100713,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "biologistes, des physiciens, des mathématiciens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une équipe de travail inattendue",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "équipe de travail inattendue",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "équipe de travail inattendue",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des biologistes, des physiciens, des mathématiciens.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une équipe de travail inattendue",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une équipe de travail inattendue",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -100605,12 +100762,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -100685,33 +100836,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "biologistes, des physiciens, des mathématiciens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "équipe de travail inattendue",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "équipe de travail inattendue",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "équipe de travail inattendue",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "biologistes, des physiciens, des mathématiciens.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une équipe de travail inattendue",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "une équipe de travail inattendue",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -100751,12 +100909,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -100813,33 +100965,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Tout ce que nous mangeons, il le décortique",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pâtés",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pâtés ou le goût de la fraise des bois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tout ce que nous mangeons, il le décortique. La cuisson des pâtés ou le goût de la fraise des bois,",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tout ce que nous mangeons, il le décortique.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tout ce que nous mangeons",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ce que nous mangeons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -100879,12 +101038,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -100942,33 +101095,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "matière noire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5625
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la matière noire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.6875
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "matière noire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5625
                 },
                 "Camembert_baseline": {
                   "answer_pred": "matière noire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5625
                 },
                 "llama-2_lora": {
                   "answer_pred": "matière noire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5625
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "matière noire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5625
                 },
                 "GPT-3.5": {
                   "answer_pred": "la matière noire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.6875
                 }
               },
               "human_annot": {
@@ -101074,40 +101234,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -101295,12 +101421,6 @@
                     "annot": "annot_10",
                     "rating": "Correct"
                   }
-                ],
-                "question": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Question douteuse"
-                  }
                 ]
               },
               "lu_in_question": true,
@@ -101346,33 +101466,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' avenir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "avenir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "100 % carbone, les nanotubes et le graphène",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' avenir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "100 % carbone, les nanotubes et le graphène",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nanotubes et le graphène",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les nanotubes et le graphène",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -101406,12 +101533,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -101474,33 +101595,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mines de crayons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans des mines de crayons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mines de crayons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans des mines de crayons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans des mines de crayons ?",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans des mines de crayons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans des mines de crayons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -101522,12 +101650,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -101602,33 +101724,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1 demi- millimètre",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1 demi- millimètre de diamètre",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1 demi- millimètre de diamètre à peine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1 demi- millimètre",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1 demi- millimètre de diamètre",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1 demi- millimètre de diamètre",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1 demi-millimètre de diamètre à peine",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -101638,12 +101767,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -101725,33 +101848,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1 demi- millimètre",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1 demi- millimètre de diamètre",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1 demi- millimètre de diamètre à peine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1 demi- millimètre",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "1 demi- millimètre de diamètre",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1 demi- millimètre de diamètre",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1 demi-millimètre",
-                  "rougeL": 0.4000000000000001
+                  "rougeL": 0.4000000000000001,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -101779,12 +101909,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -101866,33 +101990,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les dinosaures",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -101914,12 +102045,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -102000,33 +102125,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Variraptor, triceratops,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops, ces animaux étranges ont existé il y a plus de 65 millions d' années, et ils font partie du quotidien d' Éric Buffetaut, directeur de recherches au CNRS.",
-                  "rougeL": 0.1142857142857143
+                  "rougeL": 0.1142857142857143,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Diplodocus, Variraptor, triceratops",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les dinosaures",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -102048,12 +102180,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -102140,33 +102266,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Serge Haroche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Serge Haroche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Serge Haroche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Serge Haroche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Serge Haroche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Serge Haroche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Serge Haroche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -102200,12 +102333,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -102268,33 +102395,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cerveau",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cerveau",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cerveau",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cerveau",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "cerveau",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cerveau",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le cerveau",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -102328,12 +102462,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -102390,33 +102518,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nos songes sans les interpréter",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le cerveau",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des rêveurs qui mangent n' importe quoi en dormant, des aveugles qui voient pendant leurs songes, des sourds qui entendent et des paraplégiques qui courent des",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "nos songes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les songes sans les interpréter",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nos songes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les songes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -102438,12 +102573,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -102530,33 +102659,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Didier Raoult.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -102590,12 +102726,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -102664,33 +102794,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Didier Raoult",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -102724,12 +102861,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -102792,33 +102923,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Abel",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "origines de l' homme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les origines de l' homme étaient à l' est.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les origines de l' homme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les origines de l'homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -102834,12 +102972,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -102920,33 +103052,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les tombeaux sont peuplés de momies.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -102980,12 +103119,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -103043,33 +103176,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des momies",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -103103,12 +103243,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -103172,33 +103306,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "65 millions d' années",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "65 millions d' années",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "65 millions d' années",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "65 millions d' années,",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "plus de 65 millions d' années",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plus de 65 millions d' années",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "plus de 65 millions d' années",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -103232,12 +103373,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -103294,33 +103429,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "65 millions d' années",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "65 millions d' années",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "65 millions d' années",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il y a plus de 65 millions d' années,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "plus de 65 millions d' années",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plus de 65 millions d' années",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il y a plus de 65 millions d' années",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -103348,12 +103490,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -103422,33 +103558,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "étudie le stress, la qualité du sommeil, la relation entre la santé du patron et celle de son entreprise, la relation entre la santé du patron et celle de ses salariés",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "obtenir des résultats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "obtenir des résultats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "obtenir des résultats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à obtenir des résultats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "obtenir des résultats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à obtenir des résultats.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -103470,12 +103613,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -103544,33 +103681,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des résultats",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "résultats",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des résultats",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des résultats",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des résultats",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "résultats",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des résultats.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -103580,12 +103724,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -103672,33 +103810,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Stéphanie Thiébault a été nommée il y a quelques mois directrice de l' un des 10 instituts du CNRS.",
-                  "rougeL": 0.2608695652173913
+                  "rougeL": 0.2608695652173913,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -103726,12 +103871,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -103794,33 +103933,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il y a quelques mois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Stéphanie Thiébault a été nommée il y a quelques mois directrice de l' un des 10 instituts du CNRS : l' institut écologie et environnement.",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -103854,12 +104000,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -103922,33 +104062,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tout savoir sur l' eau du robinet",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un livre très complet « tout savoir sur l' eau du robinet »",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Il a dirigé avec Agathe Euzen un livre très complet « tout savoir sur l' eau du robinet »",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tout savoir sur l' eau du robinet",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tout savoir sur l' eau du robinet.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' émission « les savanturiers » sur France Inter",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Agathe Euzen est aux commandes d'un livre très complet \"tout savoir sur l'eau du robinet\".",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -103976,12 +104123,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -104044,33 +104185,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tout savoir sur l' eau du robinet",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tout savoir sur l' eau du robinet",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un livre très complet « tout savoir sur l' eau du robinet »",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tout savoir sur l' eau du robinet",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "journaliste scientifique, animatrice de l' émission « les savanturiers » sur France Inter.",
-                  "rougeL": 0.1
+                  "rougeL": 0.1,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un livre très complet « tout savoir sur l' eau du robinet »",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Agathe Euzen dirige avec quelqu'un un livre complet tout savoir sur l'eau du robinet.",
-                  "rougeL": 0.43478260869565216
+                  "rougeL": 0.43478260869565216,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -104086,12 +104234,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -104172,33 +104314,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -104232,12 +104381,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -104297,33 +104440,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Francis Eustache",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -104357,12 +104507,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -104428,33 +104572,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le Dr Laurent Schwartz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Laurent Schwartz",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Dr Laurent Schwartz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Laurent Schwartz",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Laurent Schwartz",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Laurent Schwartz",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Laurent Schwartz",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -104476,12 +104627,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -104557,42 +104702,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "département de médecine expérimentale de l' Université Claude Bernard",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "département de médecine expérimentale de l' Université Claude Bernard",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "département de médecine expérimentale de l' Université Claude Bernard",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Université Claude Bernard, à Lyon,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "departement de médecine expérimentale de l' Université Claude Bernard",
-                  "rougeL": 0.6666666666666667
+                  "rougeL": 0.6666666666666667,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "département de médecine expérimentale de l' Université Claude Bernard, à Lyon",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le département de médecine expérimentale de l' Université Claude Bernard",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -104706,33 +104852,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -104766,12 +104919,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -104846,33 +104993,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'article ne mentionne pas explicitement quelle image l'homme de Néandertal a perdu.",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -104906,12 +105060,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -104974,42 +105122,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "neurobiologiste, philosophe, directeur de recherche émérite au CNRS",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Inter neurobiologiste, philosophe, directeur de recherche",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "neurobiologiste, philosophe, directeur de recherche",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "neurobiologiste, philosophe, directeur de recherche émérite",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "neurobiologiste, philosophe, directeur de recherche émérite au CNRS.",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "neurobiologiste",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le directeur de recherche émérite au CNRS.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -105102,33 +105251,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pierres tombées du ciel",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pierres tombées du ciel",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "objets extraterrestres, de pierres tombées du ciel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "objets extraterrestres, de pierres tombées du ciel,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pierres tombées du ciel",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "objets extraterrestres, de pierres tombées du ciel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des pierres tombées du ciel",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -105138,12 +105294,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -105226,33 +105376,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pierres tombées du ciel",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pierres tombées du ciel",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "objets extraterrestres, de pierres tombées du ciel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "objets extraterrestres, de pierres tombées du ciel,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pierres tombées du ciel",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pierres tombées du ciel",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "des pierres tombées du ciel",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -105286,12 +105443,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -105356,33 +105507,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les chercheurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les chercheurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les chercheurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chercheurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hypnose, méditation, neurofeedback.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les chercheurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les chercheurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -105416,12 +105574,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -105503,33 +105655,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'image de brute épaisse",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -105557,12 +105716,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -105643,33 +105796,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "brute épaisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "son image de brute épaisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -105685,12 +105845,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -105771,33 +105925,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quantité de matière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "matière",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "matière noire » et « énergie sombre »",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "matière",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "quantité de matière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Univers",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la quantité de matière qu'il contient.",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -105837,12 +105998,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -105899,33 +106054,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les plantes, les écorces ou les feuilles pour se soigner",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "plantes, les écorces ou les feuilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les plantes, les écorces ou les feuilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les plantes, les écorces ou les feuilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les plantes, les écorces ou les feuilles pour se soigner.",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les plantes, les écorces ou les feuilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les plantes, les écorces ou les feuilles pour se soigner.",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -105947,12 +106109,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -106021,33 +106177,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les plantes, les écorces ou les feuilles pour se soigner",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "plantes, les écorces ou les feuilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les plantes, les écorces ou les feuilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les plantes, les écorces ou les feuilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "feuilles, écorces, plantes",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plantes, les écorces ou les feuilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les plantes, les écorces ou les feuilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -106093,12 +106256,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -106161,33 +106318,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "environnement et l' histoire de chacun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "environnement et l' histoire de chacun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' environnement et l' histoire de chacun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' environnement et l' histoire de chacun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' environnement et l' histoire de chacun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l'environnement et l'histoire de chacun",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'environnement et l'histoire de chacun",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -106221,12 +106385,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -106290,33 +106448,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "reconstitue les climats du passé pour imaginer ceux du futur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "reconstitue les climats du passé pour imaginer ceux du futur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "reconstitue les climats du passé pour imaginer ceux du futur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il reconstitue les climats du passé pour imaginer ceux du futur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour reconstituer les climats du passé",
-                  "rougeL": 0.46153846153846156
+                  "rougeL": 0.46153846153846156,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "reconstitue les climats du passé pour imaginer ceux du futur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "reconstitue les climats du passé",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -106356,12 +106521,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -106412,33 +106571,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "climats du passé pour imaginer ceux du futur",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les climats du passé pour imaginer ceux du futur",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les climats du passé",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les climats du passé",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "climats du passé",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "climats du passé",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les climats du passé",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -106472,12 +106638,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -106540,33 +106700,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "llama-2_lora": {
                   "answer_pred": "des nécropoles qui datent des balbutiements de la culture égyptienne",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 0.9
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nécropoles qui datent des balbutiements de la culture égyptienne",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 0.9
                 },
                 "GPT-3.5": {
                   "answer_pred": "les nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 }
               },
               "human_annot": {
@@ -106822,48 +106989,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -106992,33 +107117,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "n' interagit pas avec la matière visible",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "personne n' a pu identifier",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "personne n' a pu identifier",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "personne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "non, personne n' a pu identifier",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "personne n' a pu identifier",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "personne n'a pu identifier",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -107052,12 +107184,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -107121,33 +107247,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "défricher",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.55
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "défricher",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.55
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à défricher",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.6
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grande terre inconnue",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.25
                 },
                 "llama-2_lora": {
                   "answer_pred": "à défricher.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.6
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à peine à défricher",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.7
                 },
                 "GPT-3.5": {
                   "answer_pred": "défricher le cerveau",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.8
                 }
               },
               "human_annot": {
@@ -107445,48 +107578,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -107574,33 +107665,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "recréant de la peau à partir de cellules - souches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en recréant de la peau à partir de cellules - souches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "recréant de la peau à partir de cellules - souches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en recréant de la peau à partir de cellules - souches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "recréer de la peau à partir de cellules - souches",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "recréant de la peau à partir de cellules - souches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en recréant de la peau à partir de cellules-souches",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -107616,12 +107714,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -107710,33 +107802,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des nécropoles qui datent des balbutiements de la culture égyptienne.",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nécropoles qui datent des balbutiements de la culture égyptienne",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -107770,12 +107869,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -107838,33 +107931,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -107892,12 +107992,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -107966,33 +108060,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "balbutiements de la culture égyptienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des balbutiements de la culture égyptienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des balbutiements de la culture égyptienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des balbutiements de la culture égyptienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "depuis plus de 3000 ans",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des balbutiements de la culture égyptienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des balbutiements de la culture égyptienne.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -108026,12 +108127,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -108094,33 +108189,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en savoir plus sur leurs mœurs, leur biologie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à en savoir plus sur leurs murs, leur biologie",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "leurs murs, leur biologie",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en savoir plus sur leurs mœurs, leur biologie.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "chair humaine n' est pas a priori ce qu' il préfère.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en savoir plus sur leurs mœurs, leur biologie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en savoir plus",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -108136,12 +108238,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -108216,33 +108312,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "leurs mœurs, leur biologie",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "leurs murs, leur biologie",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sur leurs murs, leur biologie",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "leurs mœurs, leur biologie.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "biologie, mœurs, classification",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "leurs mœurs, leur biologie",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en savoir plus sur leurs mœurs, leur biologie.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -108258,12 +108361,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -108344,33 +108441,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Quel est le nom du chercheur ?",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -108416,12 +108520,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -108466,33 +108564,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "quand un requin s' attaque à un homme c' est souvent par erreur, car chair humaine n' est pas a priori ce qu' il préfère.",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un requin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -108502,12 +108607,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -108594,33 +108693,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "chair humaine",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -108666,12 +108772,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -108722,33 +108822,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "toutes ses connaissances de botaniste",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "botaniste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "botaniste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "toutes ses connaissances de botaniste",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "toutes ses connaissances de botaniste",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "toutes ses connaissances de botaniste",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ses connaissances de botaniste",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -108788,12 +108895,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -108850,42 +108951,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "savoir comment se battre avec des sabres laser ou si la cape d' invisibilité d' Harry Potter est une chose crédible",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "savoir comment se battre avec des sabres laser ou si la cape d' invisibilité d' Harry Potter est une chose crédible",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Si vous voulez savoir comment se battre avec des sabres laser ou si la cape d' invisibilité d' Harry Potter est une chose crédible",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "savoir comment se battre avec des sabres laser ou si la cape d' invisibilité d' Harry Potter est une chose crédible,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "savoir comment se battre avec des sabres laser ou si la cape d' invisibilité d' Harry Potter est une chose crédible",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "comment se battre avec des sabres laser ou si la cape d' invisibilité d' Harry Potter est une chose crédible",
-                  "rougeL": 0.967741935483871
+                  "rougeL": 0.967741935483871,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "On demande à Roland Lehoucq comment se battre avec des sabres laser ou si la cape d'invisibilité d'Harry Potter est une chose crédible.",
-                  "rougeL": 0.6666666666666667
+                  "rougeL": 0.6666666666666667,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -108979,33 +109081,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "étude de la grotte Chauvet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' étude de la grotte Chauvet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' étude de la grotte Chauvet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' étude de la grotte Chauvet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "étude de la grotte Chauvet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' étude de la grotte Chauvet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'étude de la grotte Chauvet",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -109033,12 +109142,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -109101,33 +109204,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grotte Chauvet",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grotte Chauvet",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grotte Chauvet",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grotte Chauvet",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Grotte Chauvet",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' étude de la grotte Chauvet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'étude de la grotte Chauvet",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -109167,12 +109277,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -109229,33 +109333,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "association de molécules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "association de molécules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' association de molécules,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "molécules, utilisées habituellement dans le traitement d' autres maladies",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -109277,12 +109388,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -109351,33 +109456,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "association de molécules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "association de molécules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "association de molécules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' association de molécules,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "molécules",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -109387,12 +109499,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -109479,33 +109585,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "contre les idées reçues",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -109527,12 +109640,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -109601,33 +109708,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "couleurs ternes qui oscillaient entre le vert et le marron. Ils pouvaient arborer toutes les couleurs de l' arc-en-ciel.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "non, les dinosaures n' avaient pas des couleurs ternes qui oscillaient entre le vert et le marron. Ils pouvaient arborer toutes les couleurs de l' arc-en-ciel. Non, les dinosaures, dont on connaît à ce jour environ un millier d' espèces n' étaient pas tous des géants. Il explique aussi qu' alors qu' on les croyait disparus, les dinosaures sont toujours parmi nous. Ils sont dans votre jardin, sur votre balcon, on mange leurs œufs. Sait -on que les dinosaures n' ont pas disparu ? Que l' on peut encore observer leurs descendants, les oiseaux ? Que ces volatiles disposaient d' un plumage multicolore aux motifs variés ?",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les idées reçues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -109673,12 +109787,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -109729,33 +109837,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "savoir comment se battre avec des sabres laser ou si la cape d' invisibilité d' Harry Potter est une chose crédible",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "savoir comment se battre avec des sabres laser ou si la cape d' invisibilité d' Harry Potter est une chose crédible",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Si vous voulez savoir comment se battre avec des sabres laser ou si la cape d' invisibilité d' Harry Potter est une chose crédible",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "savoir comment se battre avec des sabres laser ou si la cape d' invisibilité d' Harry Potter est une chose crédible,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "réponse à des questions sur la vulgarisation scientifique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "demandez à Roland Lehoucq",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "comment se battre avec des sabres laser ou si la cape d'invisibilité d'Harry Potter est une chose crédible",
-                  "rougeL": 0.787878787878788
+                  "rougeL": 0.787878787878788,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -109765,12 +109880,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -109858,33 +109967,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bio actif marin",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bio actif marin",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ingéniosité des chercheurs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bio actif marin",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des centaines de bio actif marin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des centaines de bio actif marin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des bio actifs marins",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -109918,12 +110034,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -109987,33 +110097,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les méduses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -110023,12 +110140,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -110115,33 +110226,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -110157,12 +110275,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -110237,33 +110349,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cellules",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "un dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un dresseur de cellules",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -110273,12 +110392,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -110365,33 +110478,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les lions y chassent le bison",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -110425,12 +110545,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -110487,33 +110601,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les lions y chassent le bison",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -110553,12 +110674,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -110615,33 +110730,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "piste prometteuse et audace quant à la fonction des rêves",
-                  "rougeL": 0.7692307692307692
+                  "rougeL": 0.7692307692307692,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "piste prometteuse et audacieuse quant à la fonction des rêves",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une piste prometteuse et audacieuse quant à la fonction des rêves",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une piste prometteuse et audacieuse quant à la fonction des rêves",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une piste prometteuse et audacieuse quant à la fonction des rêves",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une piste prometteuse et audacieuse quant à la fonction des rêves",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une piste prometteuse et audacieuse quant à la fonction des rêves",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -110669,12 +110791,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -110737,33 +110853,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la fonction des rêves",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "prometteuse et audacieuse quant à la fonction des rêves",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "piste prometteuse et audacieuse quant à la fonction des rêves",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Nos songes serviraient à maintenir notre individualité",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "individualité",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Nos songes serviraient à maintenir notre individualité",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il pense avoir trouvé une piste prometteuse",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -110803,12 +110926,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -110865,33 +110982,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -110913,12 +111037,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -110987,33 +111105,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -111047,12 +111172,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -111115,33 +111234,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le goût de la recherche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "goût de la recherche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le goût de la recherche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le goût de la recherche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le goût de la recherche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le goût de la recherche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le goût de la recherche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -111169,12 +111295,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -111243,33 +111363,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un dysfonctionnement d'un chromosome X pourrait être à l'origine de cancers.",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -111297,12 +111424,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -111365,33 +111486,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "cancers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -111431,12 +111559,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -111511,33 +111633,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Stéphane Douady",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Stéphane Douady",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Stéphane Douady",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Stéphane Douady",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Stéphane Douady",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Stéphane Douady",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Stéphane Douady",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -111571,12 +111700,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -111658,33 +111781,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -111718,12 +111848,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -111786,33 +111910,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Institut photovoltaque d' Île de France",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Institut photovoltaque d' Île de France",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Institut photovoltaïque d'Île de France",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -111858,12 +111989,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -111908,33 +112033,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Institut photovoltaque d' Île de France",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Institut photovoltaque d' Île de France",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Institut photovoltaïque d' Île de France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Institut photovoltaïque d'Île de France",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -111950,12 +112082,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -112036,33 +112162,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des manuscrits, des traités du Moyen Âge, des papyrus égyptiens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "manuscrits, des traités du Moyen ge, des papyrus égyptiens",
-                  "rougeL": 0.8888888888888888
+                  "rougeL": 0.8888888888888888,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vieux teinturiers, derniers héritiers de traditions parfois plus que millénaire",
-                  "rougeL": 0.10526315789473685
+                  "rougeL": 0.10526315789473685,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des manuscrits, des traités du Moyen Âge, des papyrus égyptiens.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des manuscrits, des traités du Moyen Âge, des papyrus égyptiens.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "recettes d' autrefois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "des recettes d' autrefois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -112102,12 +112235,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -112158,33 +112285,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "manuscrits, des traités du Moyen Âge, des papyrus égyptiens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "manuscrits, des traités du Moyen ge, des papyrus égyptiens",
-                  "rougeL": 0.8888888888888888
+                  "rougeL": 0.8888888888888888,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "manuscrits, des traités du Moyen ge, des papyrus égyptiens",
-                  "rougeL": 0.8888888888888888
+                  "rougeL": 0.8888888888888888,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "manuscrits, des traités du Moyen Âge, des papyrus égyptiens.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des manuscrits, des traités du Moyen Âge, des papyrus égyptiens.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "manuscrits, des traités du Moyen Âge, des papyrus égyptiens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "manuscrits, traités du Moyen Âge, papyrus égyptiens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -112230,12 +112364,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -112286,33 +112414,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une équipe du laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "équipe du laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une équipe du laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une équipe du laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "une équipe du laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une équipe du laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "une équipe du laboratoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -112346,12 +112481,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -112415,33 +112544,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "savoir comment les conditions de travail évoluent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "savoir comment les conditions de travail évoluent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à savoir comment les conditions de travail évoluent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "savoir comment les conditions de travail évoluent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "comment les conditions de travail évoluent",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "savoir comment les conditions de travail évoluent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il cherche à savoir comment les conditions de travail évoluent",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -112487,12 +112623,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -112537,33 +112667,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "comment les conditions de travail évoluent",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "comment les conditions de travail évoluent",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "comment les conditions de travail évoluent",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "comment les conditions de travail évoluent",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "comment évoluent les conditions de travail",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "comment les conditions de travail évoluent",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il voudrait savoir comment les conditions de travail évoluent.",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -112597,12 +112734,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -112665,33 +112796,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "influence humaine",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "influence humaine",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "influence humaine",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "influence humaine",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hervé le Treut a pris conscience de l' influence humaine dans l' effet de serre et qu' il a décidé d' y consacrer sa vie de chercheur.",
-                  "rougeL": 0.11764705882352941
+                  "rougeL": 0.11764705882352941,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de l'influence humaine",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -112731,12 +112869,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -112787,42 +112919,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "humaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "humaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "humaine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' influence humaine",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -112915,33 +113048,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Annick Loiseau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Annick Loiseau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Annick Loiseau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Annick Loiseau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Annick Loiseau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Annick Loiseau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Annick Loiseau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -112975,12 +113115,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -113063,33 +113197,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Hervé Le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hervé le Treut",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -113123,12 +113264,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -113191,33 +113326,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sous le sable d' Égypte",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tombeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tombeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tombeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans les tombeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tombeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les tombeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -113257,12 +113399,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -113319,33 +113455,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -113373,12 +113516,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -113447,33 +113584,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le chimiste amoureux des mots a trouvé comme titre \"les Savanturiers\".",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -113507,12 +113651,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -113575,33 +113713,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "il y a vingt-cinq siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "il y a vingt-cinq siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vingt-cinq siècles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il y a vingt-cinq siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans le désert Égyptien",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il y a vingt-cinq siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les ouvriers les ont quittées, il y a vingt-cinq siècles",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -113641,12 +113786,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -113703,33 +113842,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' ordinateurs superpuissants,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ordinateurs superpuissants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "À l'aide d'ordinateurs superpuissants.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -113769,12 +113915,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -113831,33 +113971,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ils sont soumis, comme certaines mers, à des phénomènes de marées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "qu' ils sont soumis, comme certaines mers, à des phénomènes de marées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ils sont soumis, comme certaines mers, à des phénomènes de marées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "qu' ils sont soumis, comme certaines mers, à des phénomènes de marées.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "qu'ils sont soumis, comme certaines mers, à des phénomènes de marées.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "qu' ils sont soumis, comme certaines mers, à des phénomènes de marées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "qu'ils sont soumis, comme certaines mers, à des phénomènes de marées.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -113879,12 +114026,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -113959,33 +114100,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une première",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une première",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une première",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une première",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une première en recréant de la peau à partir de cellules - souches",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une première",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une première en recréant de la peau à partir de cellules-souches.",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -114013,12 +114161,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -114088,33 +114230,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -114160,12 +114309,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -114210,33 +114353,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "quatre-vingt",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.9
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans les années quatre-vingt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -114366,48 +114516,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -114662,33 +114770,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' astronome Pierre Léna",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -114716,12 +114831,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -114786,33 +114895,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pierre Léna",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -114852,12 +114968,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -114916,33 +115026,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "avec les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -114982,12 +115099,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -115038,33 +115149,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "avec les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les yeux fermés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -115104,12 +115222,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -115166,33 +115278,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "climat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le climat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le climat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le climat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le climat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le climat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le climat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -115220,12 +115339,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -115294,33 +115407,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "influence humaine dans l' effet de serre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hervé le Treut a pris conscience de l' influence humaine dans l' effet de serre",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "influence humaine dans l' effet de serre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' influence humaine dans l' effet de serre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour prendre conscience de l' influence humaine dans l' effet de serre",
-                  "rougeL": 0.8333333333333334
+                  "rougeL": 0.8333333333333334,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' influence humaine dans l' effet de serre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "influence humaine dans l'effet de serre",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -115360,12 +115480,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -115416,42 +115530,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "influence humaine dans l' effet de serre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hervé le Treut a pris conscience de l' influence humaine dans l' effet de serre",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' influence humaine dans l' effet de serre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Hervé le Treut a pris conscience de l' influence humaine dans l' effet de serre",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en 1986 qu' Hervé le Treut a pris conscience de l' influence humaine dans l' effet de serre et qu' il a décidé d' y consacrer sa vie de chercheur.",
-                  "rougeL": 0.5217391304347826
+                  "rougeL": 0.5217391304347826,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pris conscience de l' influence humaine dans l' effet de serre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hervé le Treut a pris conscience de l'influence humaine dans l'effet de serre et a décidé d'y consacrer sa vie de chercheur.",
-                  "rougeL": 0.30769230769230765
+                  "rougeL": 0.30769230769230765,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -115556,33 +115671,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Roland Lehoucq",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Roland Lehoucq",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Roland Lehoucq",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Roland Lehoucq",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Roland Lehoucq",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Roland Lehoucq",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Roland Lehoucq",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -115616,12 +115738,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -115684,33 +115800,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "abandonner un rêve",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "abandonner un rêve : celui de devenir guitariste professionnel",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "abandonner un rêve : celui de devenir guitariste professionnel",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' abandonner un rêve : celui de devenir guitariste professionnel.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "devenir guitariste professionnel",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "d' abandonner un rêve",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "d'abandonner un rêve : celui de devenir guitariste professionnel.",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -115726,12 +115849,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -115806,42 +115923,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "abandonner un rêve",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "abandonner un rêve : celui de devenir guitariste professionnel",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "abandonner un rêve : celui de devenir guitariste professionnel",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' abandonner un rêve : celui de devenir guitariste professionnel.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "abandonner un rêve",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "décide d' abandonner un rêve",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "abandonner un rêve : celui de devenir guitariste professionnel.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -115934,33 +116052,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des nécropoles qui datent des balbutiements de la culture égyptienne",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nécropoles qui datent des balbutiements de la culture égyptienne",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -115976,12 +116101,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -116056,33 +116175,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des nécropoles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -116128,12 +116254,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -116196,78 +116316,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mines d' or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mines d' or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mines d' or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mines d' or,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "llama-2_lora": {
                   "answer_pred": "des mines d'or",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des mines d' or",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les mines d'or",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -116648,42 +116733,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "programmes d' aides envers les plus démunis",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "programmes d' aides",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "programmes d' aides",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "médicaments",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par des programmes d' aides",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "certains programmes d' aides",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "par des programmes d'aides",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -116776,33 +116862,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -116836,12 +116929,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -116899,33 +116986,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ronan Allain",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -116959,12 +117053,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -117028,15 +117116,15 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "eau à l' état liquide",
                   "rougeL": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "eau à l' état liquide",
                   "rougeL": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "eau à l' état liquide",
                   "rougeL": 1.0
                 },
@@ -117100,33 +117188,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en 1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -117166,12 +117261,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -117223,33 +117312,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1995",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -117289,12 +117385,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -117352,33 +117442,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ce titre « les Savanturiers »",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ce titre « les Savanturiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "journaliste scientifique, animatrice de l' émission « les savanturiers » sur France Inter.",
-                  "rougeL": 0.47058823529411764
+                  "rougeL": 0.47058823529411764,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ce titre « les Savanturiers »",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le chimiste amoureux des mots est à l'origine du titre \"les Savanturiers\".",
-                  "rougeL": 0.5714285714285714
+                  "rougeL": 0.5714285714285714,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -117394,12 +117491,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -117474,33 +117565,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Savanturiers",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "« les Savanturiers »",
-                  "rougeL": 0.8000000000000002
+                  "rougeL": 0.8000000000000002,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -117528,12 +117626,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -117602,33 +117694,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ouganda",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ouganda",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ouganda",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ouganda",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ouganda",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ouganda",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ouganda",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -117662,12 +117761,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -117731,33 +117824,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pêcheurs peu scrupuleux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "chair humaine",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "un homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -117797,12 +117897,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -117859,33 +117953,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "deux ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "deux ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "deux ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "depuis deux ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "depuis deux ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "depuis deux ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -117931,12 +118032,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -117981,33 +118076,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "deux ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "depuis deux ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "deux ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "depuis deux ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "depuis deux ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "depuis deux ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -118023,12 +118125,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -118109,33 +118205,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "émotion qu' elle n' a de cesse de vouloir partager à travers de multiples conférences",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "émotion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Une émotion qu' elle n' a de cesse de vouloir partager à travers de multiples conférences",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Une émotion qu' elle n' a de cesse de vouloir partager à travers de multiples conférences,",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des conférences",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "multiples conférences",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des conférences",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -118175,12 +118278,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -118243,33 +118340,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Bretagne.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -118309,12 +118413,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -118373,33 +118471,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les mystérieuses « matière noire » et « énergie sombre »",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "matière noire » et « énergie sombre »",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les mystérieuses « matière noire » et « énergie sombre »",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les mystérieuses « matière noire » et « énergie sombre »",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la quantité de matière noire et énergie sombre",
-                  "rougeL": 0.5714285714285714
+                  "rougeL": 0.5714285714285714,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "matière noire et énergie sombre",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les mystérieuses « matière noire » et « énergie sombre ».",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -118433,12 +118538,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -118501,33 +118600,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "10 ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "il y a 10 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "10 ans",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il y a 10 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "10 ans avant",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il y a 10 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il y a 10 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -118549,12 +118655,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -118647,33 +118747,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Rémy Mosseri",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -118707,12 +118814,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -118775,33 +118876,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "explication au phénomène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une explication au phénomène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une explication au phénomène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une explication au phénomène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une explication au phénomène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une explication au phénomène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une explication au phénomène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -118835,12 +118943,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -118921,33 +119023,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le cœlacanthe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le clacanthe",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le clacanthe",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le cœlacanthe un poisson préhistorique de 400 millions d' années",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "poisson du fond des âges : le cœlacanthe",
-                  "rougeL": 0.7692307692307693
+                  "rougeL": 0.7692307692307693,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le cœlacanthe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le cœlacanthe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -118963,12 +119072,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -119049,33 +119152,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nerfs et les muscles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les nerfs et les muscles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les nerfs et les muscles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les nerfs et les muscles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "nerfs et muscles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les nerfs et les muscles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les nerfs et les muscles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -119103,12 +119213,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -119177,33 +119281,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à l' ouest du grand Rift qui coupe en deux la corne de l' Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à l' ouest du grand Rift qui coupe en deux la corne de l' Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ouest du grand Rift",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à l' ouest du grand Rift qui coupe en deux la corne de l' Afrique.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à l' ouest du grand Rift",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à l' ouest du grand Rift qui coupe en deux la corne de l' Afrique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à l'ouest du grand Rift qui coupe en deux la corne de l'Afrique",
-                  "rougeL": 0.7
+                  "rougeL": 0.7,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -119243,12 +119354,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -119299,33 +119404,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grand Rift",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grand Rift qui coupe en deux la corne de l' Afrique",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grand Rift",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grand Rift qui coupe en deux la corne de l' Afrique.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "grand Rift",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grand Rift",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "à l'ouest du grand Rift qui coupe en deux la corne de l'Afrique.",
-                  "rougeL": 0.7
+                  "rougeL": 0.7,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -119353,12 +119465,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -119427,33 +119533,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "GIEC",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "GIEC",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "du GIEC",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "GIEC",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hervé le Treut participe activement aux travaux du GIEC.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -119463,12 +119576,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -119549,33 +119656,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "aux travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "aux travaux du GIEC, qui a rendu public la synthèse de son dernier rapport dimanche dernier",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "travaux",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "aux travaux du GIEC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "GIEC",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "travaux",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hervé le Treut participe activement aux travaux du GIEC.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -119621,12 +119735,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -119695,33 +119803,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grand-père",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Michel L' Hour",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son grand-père",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "son grand-père",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "son grand-père",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "enfant, il pratiquait la pêche avec son grand-père",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "son grand-père",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -119743,12 +119858,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -119824,33 +119933,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -119884,12 +120000,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -119946,33 +120056,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "conscience",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des signaux de conscience",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -120018,12 +120135,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -120074,33 +120185,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -120128,12 +120246,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -120196,33 +120308,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "génétiques",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les maladies génétiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -120244,12 +120363,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -120324,33 +120437,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1 million",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1 million",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1 million",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1 million",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1 million",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1 million",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en moyenne 1 million de bulles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -120378,12 +120498,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -120446,33 +120560,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1 million de bulles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1 million de bulles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1 million de bulles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1 million de bulles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1 million de bulles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1 million de bulles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un million de bulles.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -120506,12 +120627,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -120574,33 +120689,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Caen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Caen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Caen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Caen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Caen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Caen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Caen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -120640,12 +120762,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -120706,33 +120822,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "comité mondial de la recherche spatiale, le Cospar.",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le Cospar.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -120754,12 +120877,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -120828,33 +120945,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Cospar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -120894,12 +121018,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -120961,33 +121079,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Olga",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -121021,12 +121146,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -121083,33 +121202,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -121143,12 +121269,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -121211,33 +121331,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "collectionneur privé",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grand public",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jacques Doucet",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jacques Doucet",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jacques Doucet",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un collectionneur privé, Jacques Doucet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un collectionneur privé, Jacques Doucet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -121277,12 +121404,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -121335,33 +121456,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jacques Doucet",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jacques Doucet",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jacques Doucet",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jacques Doucet",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jacques Doucet",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jacques Doucet",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jacques Doucet",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -121395,12 +121523,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -121477,33 +121599,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Celui-ci",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le Baiser",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les formes qu' il libère sur la toile incarnent un désir rageur et sauvage de déconstruire le corps",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "corps",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le corps",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Le corps",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le corps",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -121549,12 +121678,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -121602,33 +121725,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "souvent",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Matisse et Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Matisse et Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Apollinaire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso et Matisse",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso et Matisse",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à son rival",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -121668,12 +121798,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -121727,33 +121851,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un choix",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "amitiés les plus marquantes de Picasso",
-                  "rougeL": 0.7777777777777778
+                  "rougeL": 0.7777777777777778,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "histoire de quelques-unes des amitiés les plus marquantes de Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Il nous fallait faire un choix",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' histoire de quelques-unes des amitiés les plus marquantes de Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' histoire de quelques-unes des amitiés les plus marquantes de Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il nous temp aurait fallait faire un choix",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -121763,12 +121894,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -121852,33 +121977,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quelques-unes des amitiés les plus marquantes de Picasso",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "quelques-unes des amitiés les plus marquantes de Picasso",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Rivalité amicale avec les artistes Aussitôt arrivé à Paris, Picasso fait connaissance avec la bohème artistique de l' époque",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' histoire de quelques-unes des amitiés les plus marquantes de Picasso.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' histoire de quelques-unes des amitiés les plus marquantes de Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' histoire de quelques-unes des amitiés les plus marquantes de Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'histoire de quelques-unes des amitiés les plus marquantes de Picasso.",
-                  "rougeL": 0.8695652173913043
+                  "rougeL": 0.8695652173913043,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -121912,12 +122044,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -121983,33 +122109,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Gertrude Stein et son frère Léo",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Gertrude Stein et son frère Léo",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -122037,12 +122170,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -122105,33 +122232,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Gertrude Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Gertrude Stein.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -122165,12 +122299,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -122233,33 +122361,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -122293,12 +122428,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -122355,33 +122484,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chantre de la « pop culture » américaine",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une superstar",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -122391,12 +122527,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -122501,33 +122631,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Serge de Diaghilev",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Serge de Diaghilev",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -122555,12 +122692,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -122630,33 +122761,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "déclarer sa flamme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sa flamme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sa flamme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa flamme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la flamme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "courte phrase, « ma jolie »",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso aborde son amour pour la belle Eva.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -122696,12 +122834,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -122770,33 +122902,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -122830,12 +122969,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -122904,33 +123037,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -122964,12 +123104,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -123032,33 +123166,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tout le monde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tout le monde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un coup d' il",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tout le monde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un coup d' œil",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "tout le monde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -123104,12 +123245,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -123163,33 +123298,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "La mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "La mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "La mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -123235,48 +123377,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -123609,33 +123709,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -123675,12 +123782,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -123737,33 +123838,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sa virilité",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tourmente",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tourmente",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa virilité",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sa virilité",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sa virilité",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sa virilité",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -123785,12 +123893,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -123866,33 +123968,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "rideau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "rideau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "rideau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "rideau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "rideau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le rideau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le rideau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -123932,12 +124041,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -123994,33 +124097,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Autoportrait bleu",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -124054,12 +124164,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -124122,33 +124226,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'autoportrait bleu",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -124182,12 +124293,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -124256,33 +124361,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "moralement et financièrement les républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "il soutient moralement et financièrement les républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le pouvoir est menacé par l' armée nationaliste de Franco",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le pouvoir est menacé par l' armée nationaliste de Franco.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "soutenir moralement et financièrement les républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "soutient moralement et financièrement les républicains espagnols, dont le pouvoir est menacé par l' armée nationaliste de Franco",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pour soutenir moralement et financièrement les républicains espagnols, dont le pouvoir est menacé par l'armée nationaliste de Franco.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -124322,12 +124434,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -124402,33 +124508,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cargos Casagemas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -124462,12 +124575,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -124542,33 +124649,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cargos Casagemas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cargos Casagemas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -124596,12 +124710,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -124682,33 +124790,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cirque Medrano",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cirque Medrano",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cirque Medrano",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cirque Medrano",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "cirque Medrano",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cirque Medrano",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le cirque Medrano",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -124742,12 +124857,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -124813,33 +124922,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -124873,12 +124989,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -124941,33 +125051,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "très tôt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "très tôt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "très tôt dans sa carrière",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "très tôt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "1905, au Salon d' automne de Paris",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "En 1905",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "très tôt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -124977,12 +125094,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -125069,33 +125180,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "train",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "train",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "train",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "train",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "train",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "train",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en train",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -125135,12 +125253,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -125193,33 +125305,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso ne débarque pas tout seul du train",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ne débarque pas tout seul du train",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso ne débarque pas tout seul du train",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso ne débarque pas tout seul du train",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "il arrive à Paris en octobre 1900, Picasso ne débarque pas tout seul du train. Il est accompagné de. Cargos Casagemas, un camarade qu' il a connu deux ans plus tôt à Barcelone pendant ses études d' art.",
-                  "rougeL": 0.06451612903225806
+                  "rougeL": 0.06451612903225806,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso ne débarque pas tout seul du train",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "en train",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -125247,12 +125366,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -125323,33 +125436,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -125383,12 +125503,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -125452,33 +125566,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "se servir du seul véritable moyen à sa mesure : l' art",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "partisan du pouvoir républicain menacé",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "il va se servir du seul véritable moyen à sa mesure : l' art",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il va se servir du seul véritable moyen à sa mesure : l' art.",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "il accepte",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il va se servir du seul véritable moyen à sa mesure : l' art",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il va se servir du seul véritable moyen à sa mesure : l'art.",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -125506,12 +125627,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -125571,33 +125686,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "se servir du seul véritable moyen à sa mesure : l' art",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "partisan du pouvoir républicain menacé",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "il va se servir du seul véritable moyen à sa mesure : l' art",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il va se servir du seul véritable moyen à sa mesure : l' art.",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "il va se servir du seul véritable moyen à sa mesure : l' art.",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il va se servir du seul véritable moyen à sa mesure : l' art",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "par l'art",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -125613,12 +125735,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -125708,33 +125824,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -125768,12 +125891,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -125839,33 +125956,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -125899,12 +126023,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -125964,33 +126082,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Claes Oldenburd",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -126030,12 +126155,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -126087,33 +126206,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' Homme au mouton",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "L' Homme au mouton",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' Homme au mouton",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "L' Homme au mouton",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "L' Homme au mouton",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "\"L' Homme au mouton\"",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -126147,12 +126273,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -126216,33 +126336,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1909",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1909",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1909",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1909",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1909",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1909",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "À partir de 1909",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -126282,12 +126409,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -126356,33 +126477,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la jeune République espagnole",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -126410,12 +126538,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -126485,33 +126607,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en 1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en 1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en 1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -126557,12 +126686,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -126608,33 +126731,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1915",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -126662,12 +126792,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -126737,33 +126861,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -126797,12 +126928,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -126860,33 +126985,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -126920,12 +127052,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -126989,33 +127115,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "800 études",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "800 études",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "800 études",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pas moins de 800 études",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "800 études",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "800 études",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso a réalisé des études.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -127049,12 +127182,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -127111,33 +127238,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "800",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "800",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "800",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "800",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "800",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "800 études",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "800 études",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -127183,12 +127317,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -127239,33 +127367,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -127293,12 +127428,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -127361,33 +127490,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le taureau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -127415,12 +127551,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -127489,33 +127619,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -127555,12 +127692,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -127618,33 +127749,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -127654,12 +127792,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -127741,33 +127873,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des avions allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -127789,12 +127928,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -127870,33 +128003,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1971.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -127924,12 +128064,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -127995,33 +128129,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "En 1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1971",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -128061,12 +128202,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -128126,33 +128261,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "son ami Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -128186,12 +128328,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -128248,33 +128384,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -128308,12 +128451,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -128376,33 +128513,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ennuyer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' ennuyer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' ennuyer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cette vie commence à l' ennuyer.",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' ennuyer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' ennuyer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Cette vie commence à l'ennuyer.",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -128418,12 +128562,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -128504,33 +128642,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "métamorphose les objets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "métamorphose les objets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "il métamorphose les objets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il métamorphose les objets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "il détecte en eux des potentialités qui les font passer d' objets du quotidien à des objets d' art.",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il métamorphose les objets",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il métamorphose les objets.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -128552,12 +128697,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -128644,33 +128783,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "La jeune femme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso, Fernande Olivier, Eva Gouel",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la jeune femme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -128704,12 +128850,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -128778,33 +128918,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "La jeune femme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Fernande Olivier, Eva Gouel",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -128838,12 +128985,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -128906,33 +129047,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -128966,12 +129114,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -129028,33 +129170,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur la scène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -129088,12 +129237,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -129156,33 +129299,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son ami et compatriote Casagemas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "il s'est volontairement représenté sous les traits d' un homme plus âgé, les joues creusées et le regard perdu",
-                  "rougeL": 0.13333333333333333
+                  "rougeL": 0.13333333333333333,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les joues creusées et le regard perdu",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "regard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le regard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le regard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "son ami et compatriote Casagemas.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -129204,12 +129354,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -129284,33 +129428,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en détail",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8125
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en détail plus loin",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.9375
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "en détail",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8125
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en détail",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8125
                 },
                 "llama-2_lora": {
                   "answer_pred": "en détail plus loin",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.9375
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en détail plus loin",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.9375
                 },
                 "GPT-3.5": {
                   "answer_pred": "nous étudierons en détail",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.8125
                 }
               },
               "human_annot": {
@@ -129348,40 +129499,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -129619,16 +129736,6 @@
                     "annot": "annot_10",
                     "rating": "Correct"
                   }
-                ],
-                "question": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Question douteuse"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Question douteuse"
-                  }
                 ]
               },
               "lu_in_question": true,
@@ -129686,33 +129793,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "La jeune femme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso la tient éloignée de son atelier durant la réalisation des « Demoiselles d' Avignon »",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso la tient éloignée de son atelier durant la réalisation des « Demoiselles d' Avignon ».",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La jeune femme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -129728,12 +129842,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -129814,33 +129922,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1966 - 1975",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des années 1966 - 1975",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "années 1966 - 1975",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des années 1966 - 1975",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1966 - 1975",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "années 1966 - 1975",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des années 1966 - 1975.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -129886,12 +130001,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -129936,33 +130045,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1966 - 1975",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1966 - 1975",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1966 - 1975",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1966 - 1975",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1966 - 1975",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "années 1966 - 1975",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des années 1966 - 1975.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -129996,12 +130112,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -130064,33 +130174,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sculptures en tôles pliées",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -130112,12 +130229,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -130186,33 +130297,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en tôles pliées",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -130252,12 +130370,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -130314,33 +130426,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "femme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une femme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sept compagnes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "femme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'arrivée ou le départ d' une femme",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Fernande Olivier",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "une femme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -130362,12 +130481,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -130442,33 +130555,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -130496,12 +130616,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -130588,33 +130702,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sa queue-de-cheval et son long cou représentés de manière stylisée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sa queue-de-cheval et son long cou",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sa queue-de-cheval et son long cou",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa queue-de-cheval et son long cou",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "queue-de-cheval et son long cou",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sa queue-de-cheval et son long cou représentés de manière stylisée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sa queue-de-cheval et son long cou",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -130624,12 +130745,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -130716,33 +130831,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "acide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "acide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "acide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' acide,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de l' acide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "de l' acide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de l' acide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -130752,12 +130874,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -130844,33 +130960,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "allusion cachée",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "allusion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un oiseau ensanglanté",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il a recours à la métaphore, au symbole, à l' allusion cachée.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "allusion cachée",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "allusion cachée",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'allusion cachée.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -130880,12 +131003,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -130972,33 +131089,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Françoise",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Françoise se lasse cependant des humeurs de Picasso et aussi de ses infidélités.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -131026,12 +131150,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -131094,33 +131212,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Françoise",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -131160,12 +131285,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -131240,33 +131359,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "lui ôte tout réalisme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cette sculpture grandeur nature n' a rien de très classique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' Homme au mouton",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' Homme au mouton",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "rêalisme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "L' Homme au mouton",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sa sculpture grandeur nature",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -131282,12 +131408,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -131368,33 +131488,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "femme au corps démesurément étiré",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la femme au corps démesurément étiré",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -131416,12 +131543,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -131491,33 +131612,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cette femme au corps démesurément étiré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la femme au corps démesurément étiré",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -131563,12 +131691,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -131620,33 +131742,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bombardement de la ville",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la ville",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bombardement de la ville",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bombardement de la ville",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Guernica",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la ville par des avions allemands",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la ville",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -131662,12 +131791,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -131748,33 +131871,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -131802,12 +131932,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -131870,33 +131994,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "En 1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1950",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -131936,12 +132067,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -131998,33 +132123,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "La mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "La mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "La mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -132064,12 +132196,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -132120,33 +132246,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mort de Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -132186,12 +132319,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -132260,42 +132387,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sa signature",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sa signature",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "coup d' il sa signature",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa signature",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "son style",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son style ou même d' identifier d' un coup d' œil sa signature",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "son style",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -132389,33 +132517,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Chicago",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Chicago",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Chicago",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Chicago",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Chicago",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Chicago",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Chicago",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -132455,12 +132590,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -132548,33 +132677,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -132608,12 +132744,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -132700,33 +132830,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -132760,12 +132897,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -132828,33 +132959,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -132870,12 +133008,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -132950,33 +133082,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -133016,12 +133155,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -133078,33 +133211,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "doubles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "doubles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "journal intime codé",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "doubles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mi - homme mi - taureau, ou l' intrépide « Mousquetaire ».",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son œuvre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les figures viennent de l'œuvre de Picasso.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -133132,12 +133272,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -133219,33 +133353,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la communauté internationale indignée, qui veut faire la lumière sur le drame? Ou encore de la raison alarmée, qui veut maintenir au cœur de l' horreur les lumières de l' esprit",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "La femme à la lampe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La femme à la lampe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "La femme à la lampe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur le toit de l' édifice et les vêtements de la femme",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans le personnage de la femme à la lampe",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "il cache un symbole",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -133261,12 +133402,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -133347,33 +133482,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "personnage masculin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au personnage masculin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au personnage masculin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "au personnage masculin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au personnage masculin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -133413,12 +133555,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -133469,33 +133605,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "personnage masculin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au personnage masculin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au personnage masculin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -133523,12 +133666,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -133597,33 +133734,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le 21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -133651,12 +133795,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -133721,33 +133859,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 21 janvier 1939",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -133787,12 +133932,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -133863,33 +134002,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Malaga en Espagne",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Malaga en Espagne",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il part de l'Espagne.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -133911,12 +134057,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -133997,33 +134137,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Espagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Espagne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -134063,12 +134210,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -134125,33 +134266,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -134191,12 +134339,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -134247,33 +134389,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -134313,12 +134462,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -134375,33 +134518,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "patte dressée et le triangle d' un bec",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une patte dressée et le triangle d' un bec",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une patte dressée et le triangle d' un bec",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une patte dressée et le triangle d' un bec.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un triangle d' un bec",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une patte dressée et le triangle d' un bec",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une patte dressée et le triangle d'un bec",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -134435,12 +134585,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -134503,33 +134647,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "avec un pistolet dans un café",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "a tiré sur Germaine avec un pistolet dans un café, avant de retourner l' arme contre lui",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Casagemas a tiré sur Germaine avec un pistolet dans un café",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans un café",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "avec un pistolet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Casagemas a tiré avec un pistolet.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -134539,12 +134690,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -134631,33 +134776,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les oppose : « Picasso est imprévisible et capricieux », affirmait Matisse, « Matisse peint de beaux et élégants tableaux », aurait un jour lancé Picasso",
-                  "rougeL": 0.14814814814814814
+                  "rougeL": 0.14814814814814814,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Matisse et Picasso",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso et Matisse",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ils s' envoient",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -134703,12 +134855,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -134763,33 +134909,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "femme au corps démesurément étiré",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "La femme au corps démesurément étiré",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "La femme marchant Cette femme au corps démesurément étiré",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le corps démesurément étiré",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La femme marchant",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -134805,12 +134958,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -134892,33 +135039,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sculptures en tôles pliées",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -134964,12 +135118,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -135014,33 +135162,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en tôles pliées",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -135080,12 +135235,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -135142,15 +135291,15 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "se servir du seul véritable moyen à sa mesure : l' art",
                   "rougeL": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "partisan du pouvoir républicain menacé",
                   "rougeL": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "il va se servir du seul véritable moyen à sa mesure : l' art",
                   "rougeL": 0.0
                 },
@@ -135228,33 +135377,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le spectacle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "spectacle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Parade a mobilisé les plus grands talents de l' avant-garde du moment en danse, en théâtre, en musique et en art",
-                  "rougeL": 0.11111111111111112
+                  "rougeL": 0.11111111111111112,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le spectacle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "le spectacle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Parade",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le spectacle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -135276,12 +135432,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -135381,33 +135531,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Carlota Valdivia",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -135435,12 +135592,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -135528,33 +135679,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Carlota Valdivia",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso, c' est une constante, ne cherche pas à plaire. Il sent qu' il est arrivé au bout de ses recherches, qu' il doit trouver un nouveau souffle.",
-                  "rougeL": 0.2666666666666667
+                  "rougeL": 0.2666666666666667,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -135582,12 +135740,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -135657,33 +135809,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Autoportrait bleu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Autoportrait bleu",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -135711,12 +135870,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -135791,33 +135944,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -135845,12 +136005,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -135913,33 +136067,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1973",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -135979,12 +136140,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -136041,33 +136196,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' Espagnol",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' Espagnol",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.35
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.35
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.35
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.35
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.35
                 }
               },
               "human_annot": {
@@ -136323,48 +136485,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -136493,33 +136613,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "support placé à l' arrière",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une sculpture a -t -elle besoin d' être rigide pour être une sculpture",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vieille bouée dégonflée",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' un support placé à l' arrière,",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un support placé à l' arrière",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un support placé à l' arrière",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "d'un support",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -136559,12 +136686,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -136621,33 +136742,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -136675,12 +136803,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -136743,33 +136865,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 25 mai 1937",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -136797,12 +136926,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -136871,33 +136994,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petites piques",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "petits piques",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les petites piques",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "petites piques",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "petites piques",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "petites piques",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les petites piques",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -136925,12 +137055,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -136999,33 +137123,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Casagemas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -137059,12 +137190,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -137139,33 +137264,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -137199,12 +137331,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -137267,42 +137393,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "surface bien lisse, bien propre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "faire une surface bien lisse, bien propre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une surface bien lisse, bien propre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une surface bien lisse, bien propre.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une surface bien lisse, bien propre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "n' a pas cherché à faire une surface bien lisse, bien propre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "une surface bien lisse, bien propre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -137394,33 +137521,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "de nombreuses esquisses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "de nombreuses esquisses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "de nombreuses esquisses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de nombreuses esquisses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de nombreuses esquisses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "de nombreuses esquisses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso a réalisé de nombreuses esquisses pour cette toile très réfléchie.",
-                  "rougeL": 0.47058823529411764
+                  "rougeL": 0.47058823529411764,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -137448,12 +137582,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -137522,33 +137650,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour l' affiche du Congrès mondial des partisans de la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour l' affiche du Congrès mondial des partisans de la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour l' affiche du Congrès mondial des partisans de la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pour l' affiche du Congrès mondial des partisans de la paix.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour l'affiche du Congrès mondial des partisans de la paix",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour l' affiche du Congrès mondial des partisans de la paix",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il l'a utilisé pour l'affiche du Congrès mondial des partisans de la paix.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -137582,12 +137717,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -137650,33 +137779,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "période bleue",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "période bleue",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "période bleue",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bleu. » Ainsi débute, à l' automne 1901, la « période bleue",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "« période bleue »",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "période bleue",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la « période bleue » de Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -137710,12 +137846,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -137778,33 +137908,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -137838,12 +137975,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -137900,33 +138031,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -137960,12 +138098,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -138058,33 +138190,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso n' a pas vingt ans lorsqu' il quitte l' Espagne.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -138118,12 +138257,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -138186,33 +138319,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "État",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' État",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "État",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' État",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "État français",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' État",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à l'État",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -138240,12 +138380,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -138308,33 +138442,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "État",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' État",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "État",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' État",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' État français",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' État",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'État français",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -138356,12 +138497,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -138436,33 +138571,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sa flamme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sa flamme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sa flamme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa flamme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sa flamme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ma jolie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso déclare sa flamme à la belle Eva.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -138472,12 +138614,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -138564,33 +138700,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sacrifice suprême, littéralement de l' holocauste",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "On n'exige rien de lui dans cet extrait de l'article.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -138618,12 +138761,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -138686,33 +138823,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sacrifier son fils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'y a pas de séquence de mots dans l'article qui répond à la question \"Qu'est-ce qui lui est demandé ?\"",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -138740,12 +138884,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -138814,33 +138952,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bombardement de la ville",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ville",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Guernica",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Guernica",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Guernica",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la ville par des avions allemands",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la ville de Guernica",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -138886,12 +139031,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -138942,33 +139081,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en mars 1938.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -139008,12 +139154,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -139065,33 +139205,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en mars 1938",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -139131,12 +139278,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -139194,33 +139335,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son ami Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "son ami Matisse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -139230,12 +139378,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -139316,33 +139458,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Matisse",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Matisse.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -139382,12 +139531,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -139444,33 +139587,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la jeune République espagnole",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la jeune République espagnole",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "République espagnole",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La jeune République espagnole",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -139480,12 +139630,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -139569,33 +139713,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tableau scandaleux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le Bain turc",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un tableau scandaleux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le Bain turc",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "« Le Bain turc »",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Le Bain turc",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "« Le Bain turc »",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -139617,12 +139768,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -139698,33 +139843,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les toiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Les toiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les toiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les toiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des papiers collés",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les toiles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un collage de surfaces colorées",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -139758,12 +139910,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -139826,33 +139972,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -139892,12 +140045,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -139949,33 +140096,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean Cocteau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -140009,12 +140163,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -140078,33 +140226,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "800",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "800",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "800",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "800",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "800 études",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "800 études",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "800 études",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -140114,12 +140269,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -140206,33 +140355,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la liaison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la liaison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la liaison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la liaison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la liaison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la liaison",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La liaison est découverte.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -140260,12 +140416,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -140334,33 +140484,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -140394,12 +140551,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -140456,33 +140607,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -140516,12 +140674,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -140584,33 +140736,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour les 90 ans du maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour les 90 ans du maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les musées nationaux n' avaient pu prêter que 8 uvres, les seules en leur possession",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les musées nationaux n' avaient pu prêter que 8 œuvres,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour les 90 ans du maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les 90 ans du maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les 90 ans du maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -140650,12 +140809,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -140706,33 +140859,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les musées nationaux n' avaient pu prêter que 8 œuvres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour les 90 ans du maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le décès de Picasso est donc l' occasion pour la France d' acquérir les pièces majeures qui manquent encore à ses collections",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les musées nationaux n' avaient pu prêter que 8 œuvres,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "90 ans du maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les 90 ans du maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les 90 ans du maître",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -140778,12 +140938,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -140834,33 +140988,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -140894,12 +141055,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -140957,33 +141112,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -141017,12 +141179,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -141086,33 +141242,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sculptures en tôles pliées",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -141152,12 +141315,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -141208,33 +141365,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en tôles pliées",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de grandes sculptures en tôles pliées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -141280,12 +141444,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -141348,33 +141506,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quatre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Quatre assistants aident Picasso.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -141396,12 +141561,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -141477,33 +141636,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -141525,12 +141691,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -141607,33 +141767,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -141667,12 +141834,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -141737,33 +141898,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -141797,12 +141965,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -141859,33 +142021,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jacob",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -141919,12 +142088,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -141987,42 +142150,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "moralement et financièrement les républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "soutient moralement et financièrement les républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le pouvoir est menacé par l' armée nationaliste de Franco",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le pouvoir est menacé par l' armée nationaliste de Franco.",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "soutenait moralement et financièrement les républicains espagnols",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "soutient moralement et financièrement les républicains espagnols, dont le pouvoir est menacé par l' armée nationaliste de Franco",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le pouvoir est menacé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -142117,33 +142281,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le portrait cubiste",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un portrait cubiste",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso réalise le portrait cubiste de son ami.",
-                  "rougeL": 0.7692307692307693
+                  "rougeL": 0.7692307692307693,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -142189,12 +142360,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -142239,33 +142404,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le portrait cubiste de son ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -142305,12 +142477,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -142367,33 +142533,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "s' installer dans un atelier de Montparnasse",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "s' installer dans un atelier de Montparnasse",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "s' installer dans un atelier de Montparnasse",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "s' installer dans un atelier de Montparnasse,",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un atelier de Montparnasse",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "de s' installer dans un atelier de Montparnasse",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "s'installer dans un atelier de Montparnasse",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -142439,12 +142612,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -142513,33 +142680,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -142573,12 +142747,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -142653,33 +142821,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -142713,12 +142888,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -142781,33 +142950,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "marque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "marque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "marque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "marque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une marque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une marque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une marque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -142841,12 +143017,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -142921,33 +143091,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -142981,12 +143158,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -143055,33 +143226,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -143115,12 +143293,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -143195,33 +143367,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -143255,12 +143434,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -143320,33 +143493,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso réalise le portrait cubiste de son ami.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -143380,12 +143560,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -143442,33 +143616,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Picasso",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Picasso réalise le portrait cubiste de son ami",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -143496,12 +143677,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -143570,33 +143745,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "se convertir au catholicisme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -143630,12 +143812,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -143698,33 +143874,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "neuf mois de travaux préparatoires, de croquis, d' esquisses, de tâtonnements",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "recherches picturales",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "picturales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "recherches picturales",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "il dévore tout ce qu' il découvre, et il l' intègre à son art.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "croquis, esquisses, tâtonnements",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "recherches picturales",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -143758,12 +143941,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -143826,33 +144003,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "toute séduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "toute séduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "toute séduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "toute séduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "séduction",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "toute séduction",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le corps de la femme nue a perdu toute séduction",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -143880,12 +144064,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -143954,33 +144132,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -144020,12 +144205,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -144073,33 +144252,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "16 mètres de long",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -144121,12 +144307,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -144192,33 +144372,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "16 mètres",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "plus de 10 mètres de haut sur 16 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -144252,12 +144439,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -144322,42 +144503,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grâce à une intervention militaire athénienne",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "renversé et tué grâce à une intervention militaire athénienne",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grâce à une intervention militaire athénienne",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "renversé et tué grâce à une intervention militaire athénienne",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par une intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "renversé et tué",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "tué grâce à une intervention militaire athénienne",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -144444,33 +144626,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "militaire athénienne",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'intervention militaire athénienne",
-                  "rougeL": 0.7692307692307692
+                  "rougeL": 0.7692307692307692,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -144504,12 +144693,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -144572,33 +144755,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "édifice avec un étage unique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "haute terrasse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "étage unique",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la « haute terrasse »,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "haute terrasse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "édifice avec un étage unique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la \"haute terrasse\"",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -144620,12 +144810,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -144696,33 +144880,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "haute terrasse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "haute terrasse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "haute terrasse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "haute terrasse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "haute terrasse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "haute terrasse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la \"haute terrasse\"",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -144762,12 +144953,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -144826,33 +145011,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "du VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -144892,12 +145084,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -144948,33 +145134,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "VIe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "VIe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "VIe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "VIe siècle.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -145014,12 +145207,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -145076,33 +145263,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sur ce site",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur ce site",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ce site",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur ce site",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -145148,12 +145342,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -145200,33 +145388,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Japon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le Japon.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -145254,12 +145449,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -145330,33 +145519,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -145390,12 +145586,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -145458,33 +145648,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tombe de saint Lazare dans la cathédrale d' Autun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tombe de saint Lazare dans la cathédrale d' Autun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tombe de saint Lazare dans la cathédrale d' Autun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tombe de saint Lazare",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tombe de saint Lazare dans la cathédrale d' Autun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "de la tombe de saint Lazare dans la cathédrale d' Autun, actuellement au musée Rolin",
-                  "rougeL": 0.8181818181818181
+                  "rougeL": 0.8181818181818181,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de la tombe de saint Lazare dans la cathédrale d' Autun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -145494,12 +145691,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -145580,33 +145771,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tombe de saint Lazare dans la cathédrale d' Autun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tombe de saint Lazare dans la cathédrale d' Autun",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tombe de saint Lazare",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tombe de saint Lazare",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tombe de saint Lazare",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "de la tombe de saint Lazare",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la tombe de saint Lazare",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -145616,12 +145814,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -145720,33 +145912,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "llama-2_lora": {
                   "answer_pred": "terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -146002,48 +146201,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -146178,33 +146335,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les terrassiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -146244,12 +146408,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -146306,33 +146464,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "raid de 259/260",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "raid de 259/260",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "raid de 259/260",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "raid de 259/260.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "259/260",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "raid de 259/260",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le raid de 259/260",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -146360,12 +146525,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -146429,33 +146588,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "raid de 259/260",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "raid de 259/260",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "raid de 259/260",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "raid de 259/260.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "259/260",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "raid de 259/260",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le raid de 259/260",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -146501,12 +146667,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -146558,33 +146718,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "celui de droite, en entrant dans le palais",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "celui de droite, en entrant dans le palais",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le savant se soit décidé pour « celui de droite, en entrant dans le palais [ sic ] »",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "monolithe",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "le plus petit des deux monuments",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "celui de droite, en entrant dans le palais",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le monument/ l'obélisque/n le plus petit des deux et le plus abîmé.",
-                  "rougeL": 0.08695652173913043
+                  "rougeL": 0.08695652173913043,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -146606,12 +146773,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -146680,33 +146841,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "celui de droite",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "celui de droite, en entrant dans le palais",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "lequel des deux monuments en partie recouverts de sable, devait en premier être envoyé en France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "lequel des deux monuments",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "le plus petit des deux",
-                  "rougeL": 0.13333333333333333
+                  "rougeL": 0.13333333333333333,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "celui de droite, en entrant dans le palais",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "« celui de droite, en entrant dans le palais »",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -146740,12 +146908,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -146820,33 +146982,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Urartu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Urartu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Urartéens",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Urartu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' Urartu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Urartu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Urartu",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -146880,12 +147049,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -146949,33 +147112,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1846",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -147015,12 +147185,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -147071,33 +147235,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1846",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 20 novembre 1846",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -147125,12 +147296,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -147199,33 +147364,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'espace occupé est rendu aux Romains et le Colisée est édifié sur l'emplacement du lac",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'espace occupé fut rendu aux Romains et le Colisée fut édifié sur l'emplacement du lac.",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -147253,12 +147425,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -147321,33 +147487,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'espace occupé fut rendu aux Romains.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -147369,12 +147542,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -147449,33 +147616,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le dux Ibba.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -147497,12 +147671,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -147589,33 +147757,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sa taille énorme et la qualité de ses détails",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sa taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "taille énorme et la qualité de ses détails",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa taille énorme et la qualité de ses détails",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sa taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "taille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la taille énorme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -147649,12 +147824,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -147716,33 +147885,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1883",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1884",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1885",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1883, par M. Alexandre Bertrand, 1884.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1883, 1884, 1885",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1883, 1884, 1885",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1883, 1884, 1885.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -147752,12 +147928,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -147845,33 +148015,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Roderick Eric Davis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Roderick Eric Davis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Roderick Eric Davis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Roderick Eric Davis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Roderick Eric Davis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Roderick Eric Davis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Roderick Eric Davis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -147905,12 +148082,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -147986,33 +148157,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trois promontoires rocheux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.7
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "trois promontoires rocheux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.7
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trois promontoires rocheux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.7
                 },
                 "Camembert_baseline": {
                   "answer_pred": "trois promontoires rocheux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.7
                 },
                 "llama-2_lora": {
                   "answer_pred": "Yenicekale, Sarıkale et Nisantepe",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.7
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "trois promontoires rocheux situés dans l' alignement entre la porte des Lions et de Büyükkale respectivement Yenicekale, Sarıkale et Nisantepe",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 0.9
                 },
                 "GPT-3.5": {
                   "answer_pred": "Yenicekale, Sarıkale et Nisantepe.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.7
                 }
               },
               "human_annot": {
@@ -148310,48 +148488,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -148449,33 +148585,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour être présenté au public",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "être présenté au public",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "être présenté au public",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pour être présenté au public",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la maison circulaire fouillée au Nord du plateau à l' emplacement du site d' activités Unexpo",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour être présenté au public",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour être présentée au public",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -148491,12 +148634,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -148571,33 +148708,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour être présenté au public",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "être présenté au public",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "être présenté au public",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "présenté au public",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "présenter au public",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour être présenté au public",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour être présenté au public",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -148613,12 +148757,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -148699,33 +148837,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour expliquer l' introduction des poteries mycéniennes que la deuxième invasion n' aurait pas ramenées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour expliquer l' introduction des poteries mycéniennes que la deuxième invasion n' aurait pas ramenées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour expliquer l' introduction des poteries mycéniennes que la deuxième invasion n' aurait pas ramenées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pour expliquer l' introduction des poteries mycéniennes",
-                  "rougeL": 0.761904761904762
+                  "rougeL": 0.761904761904762,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour expliquer l' introduction des poteries mycéniennes que la deuxième invasion n' aurait pas ramenées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "expliquer l' introduction des poteries mycéniennes que la deuxième invasion n' aurait pas ramenées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "expliquer l'introduction des poteries mycéniennes que la deuxième invasion n'aurait pas ramenées.",
-                  "rougeL": 0.8275862068965517
+                  "rougeL": 0.8275862068965517,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -148771,12 +148916,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -148821,33 +148960,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' introduction des poteries mycéniennes que la deuxième invasion n' aurait pas ramenées",
-                  "rougeL": 0.9600000000000001
+                  "rougeL": 0.9600000000000001,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "introduction des poteries mycéniennes que la deuxième invasion n' aurait pas ramenées",
-                  "rougeL": 0.9600000000000001
+                  "rougeL": 0.9600000000000001,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' introduction des poteries mycéniennes que la deuxième invasion n' aurait pas ramenées",
-                  "rougeL": 0.9600000000000001
+                  "rougeL": 0.9600000000000001,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' introduction des poteries mycéniennes",
-                  "rougeL": 0.7000000000000001
+                  "rougeL": 0.7000000000000001,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' introduction des poteries mycéniennes",
-                  "rougeL": 0.7000000000000001
+                  "rougeL": 0.7000000000000001,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' introduction des poteries mycéniennes",
-                  "rougeL": 0.7000000000000001
+                  "rougeL": 0.7000000000000001,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'introduction des poteries mycéniennes que la deuxième invasion n'aurait pas ramenées",
-                  "rougeL": 0.7857142857142856
+                  "rougeL": 0.7857142857142856,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -148863,12 +149009,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -148949,33 +149089,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "basiliques byzantines, désignées selon leur emplacement dans l' enceinte de la ville comme basilique orientale, basilique occidentale et basilique occidentale, et une quatrième hors les murs, associée à la nécropole",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "basiliques byzantines",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trois basiliques byzantines, désignées selon leur emplacement dans l'enceinte de la ville comme basilique orientale, basilique centrale et basilique occidentale, et une quatrième",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "basiliques byzantines",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "basiliques byzantines",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quatre basiliques byzantines",
-                  "rougeL": 0.8000000000000002
+                  "rougeL": 0.8000000000000002,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les basiliques byzantines",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -148997,12 +149144,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -149073,33 +149214,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "basilique orientale, basilique centrale et basilique occidentale",
-                  "rougeL": 0.1818181818181818
+                  "rougeL": 0.1818181818181818,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "basilique orientale, basilique centrale et basilique occidentale, et une quatrième hors les murs",
-                  "rougeL": 0.14285714285714285
+                  "rougeL": 0.14285714285714285,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trois basiliques byzantines, désignées selon leur emplacement dans l'enceinte de la ville comme basilique orientale, basilique centrale et basilique occidentale, et une quatrième",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "basilique orientale, basilique centrale et basilique occidentale, et une quatrième",
-                  "rougeL": 0.16666666666666666
+                  "rougeL": 0.16666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "basilique orientale, basilique centrale, basilique occidentale et une quatrième hors les murs",
-                  "rougeL": 0.14285714285714285
+                  "rougeL": 0.14285714285714285,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "basilique orientale, basilique centrale et basilique occidentale, et une quatrième hors les murs",
-                  "rougeL": 0.14285714285714285
+                  "rougeL": 0.14285714285714285,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "basilique orientale, basilique centrale et basilique occidentale",
-                  "rougeL": 0.1818181818181818
+                  "rougeL": 0.1818181818181818,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -149139,12 +149287,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -149203,33 +149345,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour les jeux jusqu' au IIIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour les jeux",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour les jeux jusqu' au IIIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "jeux",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "jeux jusqu' au IIIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour les jeux",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour les jeux",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -149251,12 +149400,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -149331,33 +149474,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pourquoi ne pas faire un autre de ces films? Le public est demandeur",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Pourquoi ne pas faire un autre de ces films?",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Harrison m' a appelé un jour et m' a dit : Pourquoi ne pas faire un autre de ces films",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Harrison Ford, qui avait un moment hésité à faire ce quatrième volet,",
-                  "rougeL": 0.14285714285714288
+                  "rougeL": 0.14285714285714288,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "pourquoi ne pas faire un autre de ces films ?",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "faire un autre de ces films",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "À propos de faire un autre de ces films",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -149403,12 +149553,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -149450,33 +149594,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pourquoi ne pas faire un autre de ces films",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Pourquoi ne pas faire un autre de ces films?",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pourquoi ne pas faire un autre de ces films",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pourquoi ne pas faire un autre de ces films",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Pourquoi ne pas faire un autre de ces films ?",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Pourquoi ne pas faire un autre de ces films ?",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pourquoi ne pas faire un autre de ces films ?",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -149486,12 +149637,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -149575,33 +149720,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Au Musée Archéologique de Santander.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -149629,12 +149781,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -149697,33 +149843,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Musée Archéologique de Santander",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Musée Archéologique de Santander.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -149763,12 +149916,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -149825,33 +149972,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ayahuasca qui a des effets hallucinogènes",
-                  "rougeL": 0.5555555555555556
+                  "rougeL": 0.5555555555555556,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'ayahuasca",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -149879,12 +150033,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -149950,33 +150098,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'ayahuasca",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -150010,12 +150165,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -150081,33 +150230,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un hanchement prononcé et une grâce confinant à la mollesse et à l' effémination",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grâce confinant à la mollesse et à l' effémination",
-                  "rougeL": 0.8421052631578948
+                  "rougeL": 0.8421052631578948,
+                  "HScore": 0.55
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "hanchement prononcé et une grâce confinant à la mollesse et à l' effémination",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un hanchement prononcé et une grâce confinant à la mollesse et à l' effémination,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "llama-2_lora": {
                   "answer_pred": "un hanchement prononcé et une grâce confinant à la mollesse et à l' effémination",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un hanchement prononcé et une grâce confinant à la mollesse et à l' effémination",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "GPT-3.5": {
                   "answer_pred": "un hanchement prononcé et une grâce confinant à la mollesse et à l' effémination",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 }
               },
               "human_annot": {
@@ -150321,48 +150477,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -150537,33 +150651,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "filtre pour spéléologues",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "filtre pour spéléologues",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "filtre pour spéléologues",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "comme un filtre pour spéléologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "comme un filtre pour spéléologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "agit comme un filtre pour spéléologues : le passage est si étroit que seules des personnes minces peuvent s' y faufiler.",
-                  "rougeL": 0.47619047619047616
+                  "rougeL": 0.47619047619047616,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Tight Spot agit comme un filtre pour spéléologues.",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -150609,12 +150730,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -150665,33 +150780,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plus de deux siècles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "plus de deux siècles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plus de deux siècles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "depuis plus de deux siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "depuis plus de deux siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plus de deux siècles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "depuis plus de deux siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -150821,48 +150943,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -151111,33 +151191,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plus de deux siècles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "depuis plus de deux siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plus de deux siècles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "depuis plus de deux siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "depuis plus de deux siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plus de deux siècles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "depuis plus de deux siècles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -151183,12 +151270,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -151239,33 +151320,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1951",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1903",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1951",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1903",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -151281,12 +151369,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -151361,33 +151443,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1951",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1903",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1908",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -151415,12 +151504,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -151489,33 +151572,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "production de stèles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "stèles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "stèles",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "stèle",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "400 av. J.-C.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "production de stèles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La production de stèles.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -151561,12 +151651,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -151611,33 +151695,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "autour de 400 av. J.-C.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "production de stèles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La production de stèles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "production de stèles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "400 av. J.-C.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la production de stèles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots qui répond à la question \"Que commencent les Mayas ?\" est \"La production de stèles\"",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -151647,12 +151738,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -151739,33 +151824,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des influences culturelles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dépôts funéraires dans des tombes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des influences culturelles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "influences culturelles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des influences culturelles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "influences de la culture de Qijia",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'influence culturelle depuis l'intérieur de la Chine",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -151805,12 +151897,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -151864,33 +151950,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les forces hollandaises ont combattu en avril 1826",
-                  "rougeL": 0.36363636363636365
+                  "rougeL": 0.36363636363636365,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -151924,12 +152017,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -151988,33 +152075,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "9 juin",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "9 juin 1826",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 9 juin 1826.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -152036,12 +152130,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -152118,33 +152206,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les parties des deux autres talus",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -152184,12 +152279,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -152240,33 +152329,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les deux autres talus",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "ses parties sont devenues invisibles",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "certaines de leurs parties sont devenues invisibles",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -152294,12 +152390,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -152368,33 +152458,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "artistes d' Altamira",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Les artistes d' Altamira",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les artistes d' Altamira",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les artistes d' Altamira",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les artistes d' Altamira",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "artistes d' Altamira",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les artistes d'Altamira.",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -152404,12 +152501,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -152496,33 +152587,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les parties des deux autres talus",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "certaines de leurs parties",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -152562,12 +152660,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -152636,33 +152728,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Assyrie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' Urartu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Urartéens",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Urartu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Urartu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Urartu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Urartéens.",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -152672,12 +152771,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -152765,33 +152858,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fin du XIXe siècle",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fin du XIXe siècle",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "fin du XIXe siècle",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fin du XIXe siècle",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "avant le XIXe siècle",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "avant la fin du XIXe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "avant la fin du XIXe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -152813,12 +152913,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -152889,33 +152983,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "XIXe siècle",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "XIXe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "XIXe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "XIXe siècle",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "XIXe siècle",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "XIXe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le XIXe siècle",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -152955,12 +153056,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -153019,33 +153114,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Cambridge et Venise.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -153085,12 +153187,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -153142,33 +153238,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Cambridge et Venise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Cambridge et Venise.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -153202,12 +153305,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -153271,33 +153368,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "On quitte Bavay par la route de Maubeuge.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -153331,12 +153435,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -153393,33 +153491,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Bavay",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -153453,12 +153558,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -153521,33 +153620,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "victoire macédonienne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mosaque d' Alexandre",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la mosaque d' Alexandre",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mosaïque d' Alexandre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une peinture plus ancienne, de la fin du IVe siècle",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la mosaïque d' Alexandre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mosaïque d' Alexandre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -153569,12 +153675,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -153647,33 +153747,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la mosaïque d' Alexandre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mosaque d' Alexandre",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la mosaque d' Alexandre",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la mosaïque d' Alexandre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "fin du IVe siècle",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mosaïque d' Alexandre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mosaïque d' Alexandre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -153707,12 +153814,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -153779,33 +153880,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ouest et est",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vomitoires ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ouest et est",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vomitoires ouest et est",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ouest et est",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vomitoires ouest et est",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les vomitoires ouest et est",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -153851,12 +153959,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -153901,33 +154003,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les vomitoires ouest et est",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vomitoires ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les vomitoires ouest et est",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vomitoires ouest et est",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les vomitoires ouest et est",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les vomitoires ouest et est",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les vomitoires ouest et est",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -153949,12 +154058,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -154029,33 +154132,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "stèles retrouvées à Bab El Aïn",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "stèles",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les stèles retrouvées à Bab El An",
-                  "rougeL": 0.823529411764706
+                  "rougeL": 0.823529411764706,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "stèles",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "stèles retrouvées à Bab El Aïn",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les stèles retrouvées à Bab El Aïn",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les stèles retrouvées à Bab El Aïn",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -154065,12 +154175,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -154159,33 +154263,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "En décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -154225,12 +154336,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -154283,33 +154388,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1929",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En décembre 1929",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -154331,12 +154443,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -154413,33 +154519,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dépôt",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le trésor de Vaise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le trésor de Vaise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le trésor de Vaise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un dépôt monétaire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le trésor de Vaise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le trésor de Vaise",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -154467,12 +154580,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -154543,33 +154650,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -154609,12 +154723,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -154667,33 +154775,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en 1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en 1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1999",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -154727,12 +154842,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -154797,33 +154906,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -154851,12 +154967,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -154919,33 +155029,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Irhuleni, roi de Hama",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Irhuleni, roi de Hama, et Hadadézer, roi de Damas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -154985,12 +155102,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -155047,33 +155158,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "La plus ancienne stèle datée des Basses - terres mayas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "La plus ancienne stèle datée des Basses - terres mayas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La plus ancienne stèle datée des Basses - terres mayas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "La plus ancienne stèle datée des Basses - terres mayas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "stèle datée des Basses - terres mayas",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la plus ancienne stèle datée des Basses - terres mayas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots qui répond à la question est \"La plus ancienne stèle\".",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -155083,12 +155201,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -155166,42 +155278,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "La plus ancienne stèle datée des Basses - terres mayas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Basses - terres mayas",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La plus ancienne stèle datée des Basses - terres mayas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "La plus ancienne stèle datée des Basses - terres mayas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la plus ancienne stèle datée des Basses - terres mayas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la plus ancienne stèle datée des Basses - terres mayas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la plus ancienne stèle",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -155291,33 +155404,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "communauté juive",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la communauté juive",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la communauté juive",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "juive",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "juive",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la communauté juive",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la communauté juive",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -155357,12 +155477,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -155432,33 +155546,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "figures de terres cuites",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.95
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "figures de terres cuites",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.95
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "figures de terres cuites",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.95
                 },
                 "Camembert_baseline": {
                   "answer_pred": "figures de terres cuites",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.95
                 },
                 "llama-2_lora": {
                   "answer_pred": "des collections du Musée du Louvre et des musées des Facultés de Lettres de bordeaux, Lille, Lyon et Montpellier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.1
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "figures de terres cuites",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.95
                 },
                 "GPT-3.5": {
                   "answer_pred": "des figures de terres cuites",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.95
                 }
               },
               "human_annot": {
@@ -155672,48 +155793,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -155886,42 +155965,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "qu' il fut saisi d' horreur et d' inquiétude pour la conservation de la grotte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "il fut saisi d' horreur et d' inquiétude pour la conservation de la grotte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "qu' il fut saisi d' horreur et d' inquiétude pour la conservation de la grotte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "qu' il fut saisi d' horreur et d' inquiétude pour la conservation de la grotte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "que la grotte a été contaminée par le traitement",
-                  "rougeL": 0.20000000000000004
+                  "rougeL": 0.20000000000000004,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "qu' un produit toxique avait été appliqué sur la souche d' un frêne, au-dessus de la cavité ornée (sur celles de deux frênes en réalité)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "on comprend qu'il y a eu un problème avec la conservation de la grotte et que des produits toxiques ont été appliqués sur les arbres au-dessus de la cavité.",
-                  "rougeL": 0.21052631578947364
+                  "rougeL": 0.21052631578947364,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -156014,33 +156094,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sultan Abdülhamid II",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le sultan Abdülhamid II",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sultan Abdülhamid II",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le sultan Abdülhamid II",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Abdülhamid II",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sultan Abdülhamid II",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le sultan Abdülhamid II",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -156068,12 +156155,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -156136,33 +156217,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Abdülhamid II",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Abdülhamid II",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Abdülhamid II",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Abdülhamid II",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Abdülhamid II",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Abdülhamid II",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le sultan Abdülhamid II",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -156190,12 +156278,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -156276,33 +156358,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "officier de l' armée française",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Aimé Laussedat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Aimé Laussedat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Aimé Laussedat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Aimé Laussedat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Aimé Laussedat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Aimé Laussedat",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -156336,12 +156425,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -156406,33 +156489,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "extraction des pierres, dites « grandes oolithes blanches » des carrières de Norroy, près de Pont - à - Mousson",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "extraction des pierres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "extraction des pierres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "extraction des pierres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "extraire des pierres",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "extraction des pierres, dites « grandes oolithes blanches » des carrières de Norroy, près de Pont - à - Mousson",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "extraire des pierres",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -156478,12 +156568,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -156528,33 +156612,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "extraction des pierres, dites « grandes oolithes blanches » des carrières de Norroy, près de Pont - à - Mousson",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "centurions, soldats, vexillaires et auxiliaires",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "centurions, soldats, vexillaires et auxiliaires qui furent affectés à l' extraction des pierres",
-                  "rougeL": 0.35294117647058826
+                  "rougeL": 0.35294117647058826,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "centurions, soldats, vexillaires et auxiliaires qui furent affectés à l' extraction des pierres,",
-                  "rougeL": 0.35294117647058826
+                  "rougeL": 0.35294117647058826,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "extraction des pierres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "extraction des pierres, dites « grandes oolithes blanches » des carrières de Norroy, près de Pont - à - Mousson",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'extraction des pierres, dites « grandes oolithes blanches » des carrières de Norroy, près de Pont - à - Mousson.",
-                  "rougeL": 0.16666666666666666
+                  "rougeL": 0.16666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -156570,12 +156661,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -156656,33 +156741,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "leurs frères",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les femmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -156722,12 +156814,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -156800,33 +156886,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "temple d' État",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "temple d' État",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "temple d' État » et capitale",
-                  "rougeL": 0.03389830508474576
+                  "rougeL": 0.03389830508474576,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le plus grand des temples du complexe monumental d' Angkor au Cambodge.",
-                  "rougeL": 0.11594202898550722
+                  "rougeL": 0.11594202898550722,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "« temple d' État » et capitale",
-                  "rougeL": 0.06666666666666667
+                  "rougeL": 0.06666666666666667,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Angkor Vat",
-                  "rougeL": 0.19672131147540983
+                  "rougeL": 0.19672131147540983,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Suryavarman II construit Angkor Vat au début du XIIe siècle.",
-                  "rougeL": 0.19178082191780818
+                  "rougeL": 0.19178082191780818,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -156842,12 +156935,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -156928,33 +157015,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "marchands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "marchands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "marchands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "marchands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "marchands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "marchands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les marchands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -156988,12 +157082,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -157056,33 +157144,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en cabotant ou en longeant les rives gelées reliant la Sibérie à l' Amérique",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en cabotant ou en longeant les rives gelées reliant la Sibérie à l' Amérique",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "en cabotant ou en longeant les rives gelées reliant la Sibérie à l' Amérique",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en cabotant ou en longeant les rives gelées reliant la Sibérie à l' Amérique",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en cabotant ou en longeant les rives gelées reliant la Sibérie à l' Amérique",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en cabotant ou en longeant les rives gelées reliant la Sibérie à l' Amérique",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en cabotant ou en longeant les rives gelées reliant la Sibérie à l' Amérique.",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -157116,12 +157211,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -157179,33 +157268,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les rives gelées reliant la Sibérie à l' Amérique",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les rives gelées reliant la Sibérie à l' Amérique",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les rives gelées reliant la Sibérie à l' Amérique",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Sibérie à l' Amérique",
-                  "rougeL": 0.5882352941176471
+                  "rougeL": 0.5882352941176471,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Sibérie à l' Amérique",
-                  "rougeL": 0.5882352941176471
+                  "rougeL": 0.5882352941176471,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "rives gelées reliant la Sibérie à l' Amérique",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les rives reliant la Sibérie à l'Amérique",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -157233,12 +157329,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -157320,33 +157410,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Talleyrand",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Talleyrand",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Talleyrand",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Talleyrand",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Talleyrand",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Talleyrand",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Talleyrand",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -157380,12 +157477,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -157448,33 +157539,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans la ville",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ville",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans la ville",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans la ville",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "à Orange",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans la ville",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Dans la ville.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -157496,12 +157594,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -157576,33 +157668,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à chauffer les maisons, d' autres pour la cuisine",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sanitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "salle de réception principale portée par quatre pilastres qui ouvrait sur une grande cour autour de laquelle s' organisait l' édifice",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "installations sanitaires",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sanitaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "destinés à chauffer les maisons, d' autres pour la cuisine, ainsi que des installations sanitaires (latrines, baignoires)",
-                  "rougeL": 0.14285714285714288
+                  "rougeL": 0.14285714285714288,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour les installations sanitaires",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -157648,12 +157747,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -157704,33 +157797,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nouvel édifice",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "amphithéâtre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "amphithéâtre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "amphithéâtre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "amphithéâtre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' amphithéâtre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'édifice nouveau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -157746,12 +157846,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -157832,33 +157926,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 0.8571428571428572
+                  "rougeL": 0.8571428571428572,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Après la mort de Néron, l'espace occupé fut rendu aux Romains et le Colisée fut édifié sur l'emplacement du lac.",
-                  "rougeL": 0.6153846153846154
+                  "rougeL": 0.6153846153846154,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -157880,12 +157981,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -157954,33 +158049,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'espace occupé est rendu aux Romains et le Colisée est édifié sur l'emplacement du lac",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' espace occupé fut rendu aux Romains et le Colisée fut édifié sur l' emplacement du lac",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Après la mort de Néron, l'espace occupé fut rendu aux Romains et le Colisée fut édifié sur l'emplacement du lac.",
-                  "rougeL": 0.6153846153846154
+                  "rougeL": 0.6153846153846154,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -158014,12 +158116,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -158094,33 +158190,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Martel a identifié les interactions entre le niveau de la rivière Echo et celui de la rivière Green.",
-                  "rougeL": 0.2666666666666667
+                  "rougeL": 0.2666666666666667,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -158148,12 +158251,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -158229,33 +158326,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Martel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Martel a vérifié l'influence de la rivière à la surface avec celle à l'intérieur de la grotte.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -158289,12 +158393,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -158358,33 +158456,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Auguste",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -158424,12 +158529,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -158481,33 +158580,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'y a pas de mention d'un professeur dans l'article.",
-                  "rougeL": 0.14285714285714285
+                  "rougeL": 0.14285714285714285,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -158547,12 +158653,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -158610,33 +158710,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sur le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sur le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sur le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sur le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sur le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -158670,12 +158777,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -158738,33 +158839,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "couteaux, marteaux, gouges, pinces, haches, burins, et même deux enclumes parmi les plus lourdes du monde antique (11 et 23,5 kg)",
-                  "rougeL": 0.3448275862068966
+                  "rougeL": 0.3448275862068966,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "couteaux, marteaux, gouges, pinces, haches, burins",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "couteaux, marteaux, gouges, pinces, haches, burins, et même deux enclumes",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "deux enclumes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "des enclumes",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux enclumes parmi les plus lourdes du monde antique",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "deux enclumes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -158780,12 +158888,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -158867,33 +158969,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "90 %",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "90 % des personnes infectées",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "90 % des personnes infectées",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "90 % des personnes infectées",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "90 % des personnes infectées",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les villages",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la population",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -158933,12 +159042,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -158989,33 +159092,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "90 %",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "90 %",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "90 %",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "90 %",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "90 % des personnes infectées",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "90 %",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "90 % des personnes infectées",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -159049,12 +159159,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -159129,33 +159233,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Julien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Julien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Julien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Julien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Julien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Julien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Julien qui y séjourne durant l'hiver 362/363.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -159183,12 +159294,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -159270,33 +159375,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sa tombe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tombes rupestres",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Tombe EK2 : Ahmès Pen - Nekhbet",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tombe EK2 : Ahmès Pen - Nekhbet",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tombe EK3 : Pahéri.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Tombe EK2 : Ahmès Pen - Nekhbet",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Sa tombe contient des textes concernant l'histoire du Nouvel Empire, des pharaons Ahmôsis Ier et Thoutmôsis III.",
-                  "rougeL": 0.08333333333333333
+                  "rougeL": 0.08333333333333333,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -159318,12 +159430,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -159404,33 +159510,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sur le versant sud de la colline qui domine la ville",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tombes rupestres",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Tombe EK2 : Ahmès Pen - Nekhbet",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tombe EK2 : Ahmès Pen - Nekhbet",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tombe EK3 : Pahéri.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Tombe EK2 : Ahmès Pen - Nekhbet",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots qui répond à la question est \"Sa tombe contient des textes concernant l'histoire du Nouvel Empire, des pharaons Ahmôsis Ier et Thoutmôsis III.\"",
-                  "rougeL": 0.07142857142857142
+                  "rougeL": 0.07142857142857142,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -159470,12 +159583,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -159532,33 +159639,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "racheter l' ensemble du domaine de Mammoth Cave",
-                  "rougeL": 0.5833333333333334
+                  "rougeL": 0.5833333333333334,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "racheter l' ensemble du domaine de Mammoth Cave, ainsi que Bishop et les autres esclaves de Franklin Gorin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "de racheter l' ensemble du domaine de Mammoth Cave, ainsi que Bishop et les autres esclaves de Franklin Gorin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "racheter l' ensemble du domaine de Mammoth Cave,",
-                  "rougeL": 0.5833333333333334
+                  "rougeL": 0.5833333333333334,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "ouvrir un hôpital pour tuberculeux dans la grotte",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "racheter l' ensemble du domaine de Mammoth Cave",
-                  "rougeL": 0.5833333333333334
+                  "rougeL": 0.5833333333333334,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "racheter l' ensemble du domaine de Mammoth Cave",
-                  "rougeL": 0.5833333333333334
+                  "rougeL": 0.5833333333333334,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -159574,12 +159688,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -159660,33 +159768,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Al Nasir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -159720,12 +159835,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -159788,33 +159897,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les analyses chimiques de la fin du XIXe siècle",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "analyses chimiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les analyses chimiques de la fin du XIXe siècle",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "analyses chimiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "M. Daubrée, en 1881, notait : « Pour ramollir une roche aussi réfractaire que le granite, il a fallu une intention formelle, servie par des efforts habiles et prolongés … Il a fallu une surabondance, une sorte de luxe de chaleur … par suite d' un procédé ingénieux et puissant ».",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les analyses chimiques de la fin du XIXe siècle",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les analyses chimiques.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -159830,12 +159946,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -159911,42 +160021,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les analyses chimiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "analyses chimiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les analyses chimiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "analyses chimiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "M. Daubrée, en 1881, notait : « Pour ramollir une roche aussi réfractaire que le granite, il a fallu une intention formelle, servie par des efforts habiles et prolongés … Il a fallu une surabondance, une sorte de luxe de chaleur … par suite d' un procédé ingénieux et puissant ».",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les analyses chimiques de la fin du XIXe siècle permirent de comprendre",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les analyses chimiques de la fin du XIXe siècle permirent de comprendre que la soude, la potasse, le sel, l' argile",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -160040,33 +160151,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "2014",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "2014",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "2014",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "2014",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "2014",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "2014",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "2014",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -160106,12 +160224,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -160168,33 +160280,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le général fatimide Jawhar arrive à al-Mansuriya en 969.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -160222,12 +160341,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -160287,33 +160400,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en 973",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "969",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le général fatimide Jawhar atteint al-Mansuriya en 969.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -160341,12 +160461,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -160406,33 +160520,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "consul",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "conservateur des antiquités grecques et romaines du British Museum",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -160460,12 +160581,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -160528,33 +160643,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "conservateur des antiquités grecques et romaines du British Museum",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -160594,12 +160716,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -160650,33 +160766,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "conservateur des antiquités grecques et romaines du British Museum",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "consul à Rome",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -160710,12 +160833,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -160778,42 +160895,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Géants, fils de Gaïa",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Géants",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Géants, fils de Gaa",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les Géants",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Géants, fils de Gaïa",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Géants, fils de Gaïa",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Géants",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -160906,33 +161024,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Marseille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Marseille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Marseille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Marseille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Marseille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Marseille",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Marseille.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -160966,12 +161091,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -161037,33 +161156,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Olivier Jehasse",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Olivier Jehasse",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Olivier Jehasse",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Olivier Jehasse",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Olivier Jehasse",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Olivier Jehasse",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Olivier Jehasse.",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -161097,12 +161223,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -161166,33 +161286,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "19",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "130 - 136",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "130",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "19",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "130 - 136",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vers 130 - 136",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 19.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -161238,12 +161365,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -161289,33 +161410,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "130 - 136",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "130 - 136",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "130 - 136",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "En 19",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "130 - 136",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vers 130 - 136",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vers 130 - 136",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -161361,12 +161489,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -161418,33 +161540,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fouilles de l' Héraion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fouilles de l' Héraion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "aux fouilles de l' Héraion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "aux fouilles de l' Héraion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à construire des routes dans l' ouest de l' Empire ottoman",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "aux fouilles de l' Héraion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "aux fouilles de l'Héraion en 1861, aux travaux de construction des routes dans l'ouest de l'Empire ottoman, aux fouilles à Pergame en 1864.",
-                  "rougeL": 0.23076923076923075
+                  "rougeL": 0.23076923076923075,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -161460,12 +161589,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -161540,33 +161663,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Héraion",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fouilles de l' Héraion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "fouilles de l' Héraion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "aux fouilles de l' Héraion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Héraion",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Héraion",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Samos, Héraion",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -161594,12 +161724,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -161668,33 +161792,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hélios, dieu du Soleil",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -161734,12 +161865,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -161790,33 +161915,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Grec",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Grecs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -161844,12 +161976,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -161918,15 +162044,15 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "On pouvait boire aussi de l'ayahuasca qui a des effets hallucinogènes en affectant le système nerveux central",
                   "rougeL": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "prêtres",
                   "rougeL": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Incas",
                   "rougeL": 0.0
                 },
@@ -161985,33 +162111,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "système nerveux central",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "prêtres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Incas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "puissances surnaturelles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les prêtres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "on pouvait",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "on pouvait boire aussi de l'ayahuasca",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -162027,12 +162160,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -162114,33 +162241,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "21 mai 1919",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le matin du mercredi 21 mai 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le matin du mercredi 21 mai 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mercredi 21 mai 1919",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le mercredi 21 mai 1919",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mercredi 21 mai 1919",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le matin du mercredi 21 mai 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -162186,12 +162320,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -162236,33 +162364,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mercredi 21 mai 1919",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "matin du mercredi 21 mai 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "matin du mercredi 21 mai 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mercredi 21 mai 1919",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le mercredi 21 mai 1919",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le matin du mercredi 21 mai 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le matin du mercredi 21 mai 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -162308,12 +162443,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -162364,33 +162493,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un prieur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un prieur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un prieur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "prieur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un prieur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un prieur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un prieur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -162418,12 +162554,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -162505,33 +162635,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -162565,12 +162702,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -162640,33 +162771,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Coiedii",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Coiedii",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Coiedii",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la famille des Coiedii",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -162688,12 +162826,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -162769,33 +162901,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fouilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fouilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "fouilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des fouilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une expédition scientifique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des fouilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des fouilles",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -162811,12 +162950,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -162897,33 +163030,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "descendants directs de Manco Capac",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "merveilleuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "merveilleuse",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "origine merveilleuse",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "démi-dieux fils du soleil",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fils du soleil",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les souverains incas ont une origine \"merveilleuse\"",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -162939,12 +163079,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -163025,33 +163159,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à un kilomètres et demi au nord de la pyramide à tranches de Zaouïet el - Aryan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à un kilomètres et demi au nord de la pyramide à tranches de Zaouet el - Aryan",
-                  "rougeL": 0.8181818181818182
+                  "rougeL": 0.8181818181818182,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à un kilomètres et demi au nord de la pyramide à tranches de Zaouet el - Aryan",
-                  "rougeL": 0.8181818181818182
+                  "rougeL": 0.8181818181818182,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à un kilomètres et demi au nord de la pyramide à tranches de Zaouïet el - Aryan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à un kilomètre et demi au nord de la pyramide à tranches de Zaouïet el - Aryan",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à un kilomètres et demi au nord de la pyramide à tranches de Zaouïet el - Aryan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à un kilomètre et demi au nord de la pyramide à tranches de Zaouïet el - Aryan",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -163067,12 +163208,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -163147,33 +163282,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un kilomètres et demi au nord de la pyramide à tranches de Zaouïet el - Aryan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à un kilomètres et demi au nord de la pyramide à tranches de Zaouet el - Aryan",
-                  "rougeL": 0.8181818181818182
+                  "rougeL": 0.8181818181818182,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à un kilomètres et demi au nord de la pyramide à tranches de Zaouet el - Aryan",
-                  "rougeL": 0.8181818181818182
+                  "rougeL": 0.8181818181818182,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à un kilomètres et demi au nord de la pyramide à tranches de Zaouïet el - Aryan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à un kilomètre et demi au nord de la pyramide à tranches de Zaouïet el - Aryan",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à un kilomètres et demi au nord de la pyramide à tranches de Zaouïet el - Aryan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à un kilomètre et demi au nord de la pyramide à tranches de Zaouïet el - Aryan",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -163213,12 +163355,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -163275,33 +163411,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "accumulation de vestiges de différentes époques",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "accumulation de vestiges de différentes époques",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "accumulation de vestiges de différentes époques",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "accumulation de vestiges de différentes époques",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "accumulation de vestiges de différentes époques",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cette accumulation de vestiges de différentes époques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'accumulation de vestiges de différentes époques.",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -163347,12 +163490,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -163421,33 +163558,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son esprit",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son esprit",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son esprit",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "son esprit",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "le corps de la jeune fille",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son esprit",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'esprit du sacrifié",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -163475,12 +163619,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -163549,33 +163687,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -163615,12 +163760,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -163674,33 +163813,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en 1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1586",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -163740,12 +163886,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -163817,33 +163957,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Les processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "modification du milieu de croissance",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -163871,12 +164018,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -163951,33 +164092,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "modifiant les patrons de croissance",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "modification du milieu de croissance",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les processus hydrogéomorphologiques.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -163993,12 +164141,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -164079,33 +164221,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la coiffure masculine",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -164151,12 +164300,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -164201,33 +164344,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "d'une seule pièce",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "différences sociales",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les différences sociales s' expriment principalement au niveau de la qualité du tissu employé pour leur confection.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -164255,12 +164405,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -164329,42 +164473,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les raisons de cette évolution",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Les raisons de cette évolution, à laquelle on assiste pour la grande majorité des villes de la Gaule romaine pendant l' Antiquité tardive",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les raisons de cette évolution, à laquelle on assiste pour la grande majorité des villes de la Gaule romaine pendant l' Antiquité tardive",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "raisons de cette évolution",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "raisons de cette évolution",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les raisons de cette évolution.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -164458,42 +164603,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "aile gauche",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "aile gauche",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "aile gauche",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "aile gauche",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "aile gauche",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' aile gauche",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'aile gauche de la flotte de ce dernier",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -164586,33 +164732,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "septembre 1997",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "septembre 1997",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "septembre 1997",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en septembre 1997,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en septembre 1997",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "septembre 1997",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en septembre 1997",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -164658,12 +164811,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -164714,42 +164861,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un confort",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "confort",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "système de drainage des eaux usées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un confort",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un système de drainage des eaux usées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un confort probablement inventé par cette civilisation",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un système de drainage des eaux usées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -164849,33 +164997,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Santo António de Tanna",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -164903,12 +165058,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -164983,33 +165132,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "escadre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la frégate",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -165037,12 +165193,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -165129,33 +165279,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son mémorial en souvenir du génocide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son mémorial en souvenir du génocide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son mémorial en souvenir du génocide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "son mémorial en souvenir du génocide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un parti clandestin",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son mémorial en souvenir du génocide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Erevan a créé son mémorial en souvenir du génocide.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -165195,12 +165352,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -165269,33 +165420,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mémorial en souvenir du génocide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mémorial",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son mémorial en souvenir du génocide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mémorial",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "mémorial en souvenir du génocide",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son mémorial",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Erevan a construit son mémorial en souvenir du génocide.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -165311,12 +165469,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -165397,33 +165549,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les caractères germaniques des statuts grecques et romaines.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -165439,12 +165598,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -165525,33 +165678,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tous les trois ans",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tous les trois ans depuis 1953",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tous les trois ans depuis 1953",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1953",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "tous les trois ans depuis 1953",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tous les trois ans depuis 1953",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "tous les trois ans depuis 1953",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -165573,12 +165733,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -165648,33 +165802,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tous les trois ans",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tous les trois ans depuis 1953",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tous les trois ans depuis 1953",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tous les trois",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "trois ans",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tous les trois ans",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "tous les trois ans",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -165720,12 +165881,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -165777,33 +165932,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la peur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la peur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la peur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la peur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "peur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' expression de la peur",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'expression de la peur",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -165819,12 +165981,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -165899,33 +166055,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la peur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "peur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la peur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' expression de la peur",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "peur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "peur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'expression de la peur",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -165965,12 +166128,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -166039,33 +166196,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le site archéologique de Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "site archéologique de Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "site archéologique de Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le site archéologique de Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le site archéologique de Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le site archéologique de Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le site archéologique de Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -166105,12 +166269,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -166170,33 +166328,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le site archéologique de Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Briga intemporelle",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Briga",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le site archéologique de Briga.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -166236,12 +166401,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -166295,33 +166454,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Kenya",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en Afrique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Afrique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tanzanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Kenya (KE)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "KE - 1",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Kenya (KE) : • _ KE - 1 – Lac Turkana",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -166355,12 +166521,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -166417,42 +166577,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "australopithèques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "hominines",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "hominines",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "hominines",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sable et roches",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Au fond du lac Turkana, on trouve les principaux sites de découverte d'hominines, notamment des australopithèques.",
-                  "rougeL": 0.32
+                  "rougeL": 0.32,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -166575,33 +166736,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Léon de Beylié",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Léon de Beylié",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Léon de Beylié",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Léon de Beylié",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "capitaine Léon de Beylié",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Léon de Beylié",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Léon de Beylié",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -166635,12 +166803,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -166715,33 +166877,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "âge du cuivre ou Chalcolithique et précède l' âge du fer",
-                  "rougeL": 0.1818181818181818
+                  "rougeL": 0.1818181818181818,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' Amérique latine où les civilisations précolombiennes connurent une métallurgie de l' or et du cuivre jusqu' à la conquête espagnole",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "âge du bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "métallurgie de l' or et du cuivre jusqu' à la conquête espagnole.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' or et du cuivre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' âge du bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la période de l'âge du bronze",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -166757,12 +166926,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -166844,33 +167007,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "moellons de petit appareil et les terres cuites",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Les moellons de petit appareil et les terres cuites",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les moellons de petit appareil et les terres cuites",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "moellons de petit appareil et les terres cuites",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "moellons de petit appareil et les terres cuites",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "moellons de petit appareil et les terres cuites (tubuli, fragments de briques ou de tuiles)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les moellons de petit appareil et les terres cuites",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -166880,12 +167050,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -166966,33 +167130,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "moellons de petit appareil et les terres cuites",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "moellons de petit appareil et les terres cuites",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "moellons de petit appareil et les terres cuites",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "moellons de petit appareil et les terres cuites (tubuli, fragments de briques ou de tuiles)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "moellons de petit appareil et les terres cuites (tubuli, fragments de briques ou de tuiles)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "moellons de petit appareil et les terres cuites (tubuli, fragments de briques ou de tuiles)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les moellons de petit appareil et les terres cuites",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -167032,12 +167203,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -167106,33 +167271,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le récit de Dion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "récit de Dion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le récit de Dion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le récit de Dion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le récit de Dion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le récit de Dion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le récit de Dion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -167166,12 +167338,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -167240,33 +167406,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Dion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Dion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le récit de Dion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Dion",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le récit de Dion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le récit de Dion",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -167306,12 +167479,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -167368,33 +167535,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -167434,12 +167608,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -167491,33 +167659,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les anciens rois de la région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -167533,12 +167708,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -167620,33 +167789,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -167680,12 +167856,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -167742,33 +167912,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "En 1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "En 1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1876",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -167808,12 +167985,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -167888,33 +168059,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chauves-souris",
-                  "rougeL": 0.1739130434782609
+                  "rougeL": 0.1739130434782609,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chauves-souris",
-                  "rougeL": 0.1739130434782609
+                  "rougeL": 0.1739130434782609,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chauves-souris",
-                  "rougeL": 0.1739130434782609
+                  "rougeL": 0.1739130434782609,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chauves-souris",
-                  "rougeL": 0.1739130434782609
+                  "rougeL": 0.1739130434782609,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "9 ou 12 millions d' individus",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "chauves-souris de l' Indiana, chauve-souris cendrée, petite chauve-souris brune, grande Chauve-souris brune et pipistrelle de l' Est",
-                  "rougeL": 0.975609756097561
+                  "rougeL": 0.975609756097561,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ces espèces",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -167960,12 +168138,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -168016,33 +168188,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "février 1678",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "février 1678",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "février 1678",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "février 1678",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "février 1678",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "février 1678",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La construction du bateau débute à Bassein en février 1678.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -168070,12 +168249,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -168145,33 +168318,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "au départ des oursins",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au départ des oursins",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au départ des oursins",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "oursins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "oursins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "oursins",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "des oursins.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -168193,12 +168373,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -168274,33 +168448,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "deux narguilés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "renfort pour la garnison du fort Jesus",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "deux narguilés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "deux narguilés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "narguilés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux narguilés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les narguilés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -168322,12 +168503,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -168402,33 +168577,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans la Cité ou à ses abords immédiats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans la Cité ou à ses abords immédiats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans la Cité ou à ses abords immédiats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans la Cité ou à ses abords immédiats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Cité ou à ses abords immédiats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans la Cité ou à ses abords immédiats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans la Cité ou à ses abords immédiats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -168468,12 +168650,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -168530,33 +168706,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grâce à une intervention militaire athénienne",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grâce à une intervention militaire athénienne",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grâce à une intervention militaire athénienne",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par une intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grâce à une intervention militaire athénienne",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "grâce à une intervention militaire athénienne",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -168602,12 +168785,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -168652,33 +168829,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "militaire athénienne",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une intervention militaire athénienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -168724,12 +168908,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -168780,33 +168958,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Attale Ier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Attale Ier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Attale Ier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' Attale Ier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "llama-2_lora": {
                   "answer_pred": "Attale Ier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Attale Ier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Attale Ier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -168978,48 +169163,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -169232,42 +169375,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "routes romaines constituant un système défensif",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ancien réseau de routes romaines constituant un système défensif",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des techniques de reconnaissance aérienne permettront de retrouver le tracé d' un ancien réseau de routes romaines constituant un système défensif",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ancien réseau de routes romaines",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "de la déclaration de guerre",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "techniques de reconnaissance aérienne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "d'un ancien réseau de routes romaines constituant un système défensif.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -169354,33 +169498,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ancien réseau de routes romaines constituant un système défensif",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ancien réseau de routes romaines constituant un système défensif",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Irak et la Jordanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des techniques de reconnaissance aérienne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'Irak et la Jordanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "T. E. Lawrence",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -169390,12 +169541,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -169488,33 +169633,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "objets très éloignés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "objets très éloignés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "arbrisseau, pas un buisson",
-                  "rougeL": 0.2
+                  "rougeL": 0.2,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des objets très éloignés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' horizon y est vaste et rien ne cache à la vue des objets très éloignés.",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des objets très éloignés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les objets très éloignés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -169524,12 +169676,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -169629,33 +169775,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -169695,12 +169848,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -169757,33 +169904,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sur un gué",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sur un gué",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sur un gué",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sur un gué qui offre en son centre un îlot central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur un gué qui offre en son centre un îlot central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sur un gué qui offre en son centre un îlot central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur un gué",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -169793,12 +169947,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -169885,33 +170033,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "partage de l' Empire",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "lors du partage de l' Empire à la suite de la mort du fils d' Alexandre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "partage de l' Empire à la suite de la mort du fils d' Alexandre",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "partage de l' Empire",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "après le partage de l' Empire à la suite de la mort du fils d' Alexandre",
-                  "rougeL": 0.8888888888888888
+                  "rougeL": 0.8888888888888888,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "On citera par exemple Cassandre ou Ptolémée Ier.",
-                  "rougeL": 0.09523809523809525
+                  "rougeL": 0.09523809523809525,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "lors du partage de l' Empire",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -169927,12 +170082,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -170013,33 +170162,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -170055,12 +170211,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -170135,33 +170285,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Pincevent",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -170201,12 +170358,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -170275,33 +170426,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -170335,12 +170493,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -170409,33 +170561,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Gaulois",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les Gaulois.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -170457,12 +170616,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -170549,33 +170702,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Chancas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Viracocha",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Chancas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Chancas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Chancas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Chancas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Chancas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -170615,12 +170775,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -170691,33 +170845,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "hydrogéomorphologiques",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Les processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par la modification de son milieu de croissance (variation du niveau du sol, inclinaison ou ploiement de la tige).",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "modifiant les patrons de croissance (répartition, diversité spécifique)",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les processus hydrogéomorphologiques.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -170751,12 +170912,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -170825,33 +170980,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "modifiant les patrons de croissance",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "modifiant les patrons de croissance",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "patrons de croissance",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par la modification de son milieu de croissance (variation du niveau du sol, inclinaison ou ploiement de la tige).",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les processus hydrogéomorphologiques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -170861,12 +171023,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -170965,33 +171121,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le nom du village a donné son nom à la période qui a progressivement, au XXe siècle, trouvé ses limites historiques, géographiques et culturelles",
-                  "rougeL": 0.11764705882352941
+                  "rougeL": 0.11764705882352941,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "limites historiques, géographiques et culturelles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le nom du village a donné son nom à la période qui a progressivement, au XXe siècle, trouvé ses limites historiques, géographiques et culturelles",
-                  "rougeL": 0.11764705882352941
+                  "rougeL": 0.11764705882352941,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le nom du village a donné son nom",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le nom de la culture néolithique",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son nom",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le nom du village a donné son nom à la période.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -171013,12 +171176,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -171093,33 +171250,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Portugal",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vestiges d' enceintes vitrifiées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "enceintes vitrifiées",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vestiges d' enceintes vitrifiées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Portugal",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vestiges d' enceintes vitrifiées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Des vestiges d' enceintes vitrifiées ont été découverts en Europe du Sud.",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -171165,12 +171329,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -171215,42 +171373,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Portugal",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "enceintes vitrifiées",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "enceintes vitrifiées",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "enceintes vitrifiées",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Portugal",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "murs vitrifiés",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des vestiges d'enceintes vitrifiées",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -171343,33 +171502,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -171415,12 +171581,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -171465,33 +171625,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Hittites",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -171501,12 +171668,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -171593,33 +171754,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "radiocarbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "radiocarbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "radiocarbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "radiocarbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "radiocarbone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "datations radiocarbone",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les datations radiocarbone",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -171659,12 +171827,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -171721,33 +171883,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grecque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grecque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grecque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grecque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Grec",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Érétrie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la colonisation grecque",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -171793,12 +171962,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -171849,33 +172012,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une tour, une tombe, une stèle funéraire et le Grand autel de Pergame",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans une tour, une tombe, une stèle funéraire et le Grand autel de Pergame",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans une tour, une tombe, une stèle funéraire et le Grand autel de Pergame",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une tour, une tombe, une stèle funéraire et le Grand autel de Pergame.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tour, une tombe, une stèle funéraire et le Grand autel de Pergame",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Pergame",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Pergame",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -171921,12 +172091,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -171971,33 +172135,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tour, une tombe, une stèle funéraire et le Grand autel de Pergame",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tour, une tombe, une stèle funéraire et le Grand autel de Pergame",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une tour, une tombe, une stèle funéraire et le Grand autel de Pergame",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tour, une tombe, une stèle funéraire et le Grand autel de Pergame.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tour, tombe, stèle funéraire et le Grand autel de Pergame",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une tour, une tombe, une stèle funéraire et le Grand autel de Pergame",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une tour, une tombe, une stèle funéraire et le Grand autel de Pergame",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -172013,12 +172184,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -172117,33 +172282,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "leurs terres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "leurs terres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "leurs terres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "leurs terres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les terres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "leurs terres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "leurs terres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -172183,12 +172355,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -172245,33 +172411,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' opinion que ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "que ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "que ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ce sont des bâtiments commerciaux",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -172287,12 +172460,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -172367,33 +172534,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' opinion que ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "L' opinion que ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' opinion que ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' opinion que ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "que ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "que ce sont des bâtiments commerciaux où les poteries et leur contenu changeaient de main est exprimé par plusieurs chercheurs.",
-                  "rougeL": 0.8181818181818182
+                  "rougeL": 0.8181818181818182,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -172403,12 +172577,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -172495,33 +172663,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Hans Rienerth",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hans Rienerth",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Hans Rienerth",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Hans Rienerth",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hans Rienerth",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Hans Rienerth",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hans Rienerth",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -172555,12 +172730,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -172624,74 +172793,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grecque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Érétrie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.2222222222222222
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grecque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grecque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Chalcidique et en Occident",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Grecs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la colonisation grecque",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5555555555555556
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -172996,20 +173134,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "question": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Question douteuse"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Question douteuse"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Question douteuse"
-                  }
-                ],
                 "no_answer": [
                   {
                     "annot": "annot_5",
@@ -173058,42 +173182,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Chalcidique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Chalcidique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grecque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grecque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Chalcidique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Érétrie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Chalcidique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -173210,33 +173335,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Tefnakht de Saïs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Tefnakht et ses troupes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Piyé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tefnakht",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Piyé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Piyé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Piyé enlève Memphis suite à de violents combats.",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -173264,12 +173396,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -173335,33 +173461,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "détruit les fortifications comme on l' exigeait et reprirent le combat avec l' aide des Romains",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "détruire les fortifications",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "détruire les fortifications",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "reddition de la population",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de détruire les fortifications",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "détruire les fortifications",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de détruire les fortifications",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -173389,12 +173522,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -173458,33 +173585,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fortifications",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les fortifications",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les fortifications",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les fortifications",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "fortifications",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les fortifications",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les fortifications",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -173530,12 +173664,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -173587,33 +173715,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Bernardino de Sahagún",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Bernardino de Sahagn",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Bernardino de Sahagn",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Bernardino de Sahagún",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Bernardino de Sahagún",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Bernardino de Sahagún",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Bernardino de Sahagún",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -173635,12 +173770,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -173717,33 +173846,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "comté d' Edmonson",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Mammoth Cave",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Edmonson",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Mammoth Cave",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le comté d' Edmonson",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les villes avoisinantes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le comté d'Edmonson",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -173783,12 +173919,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -173846,33 +173976,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Mammoth Cave",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Mammoth Cave",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Parc National de Mammoth Cave",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Mammoth Cave",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Parc National de Mammoth Cave",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Mammoth Cave",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le parc national de Mammoth Cave",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -173918,12 +174055,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -173993,33 +174124,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "expédition",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "L' expédition",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' expédition",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' expédition vers Khotan",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "expédition",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' expédition",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'expédition prend le départ de Bandipur au Cachemire à la fin du mois de mai 1900.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -174065,12 +174203,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -174122,33 +174254,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "si l' on a à faire à une cité",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "L' agencement des bâtiments",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "si l' on a à faire à une cité",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "si l' on a à faire à une cité,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "si l'on a à faire à une cité",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "si l' on a à faire à une cité",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "si l'on a affaire à une cité",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -174164,12 +174303,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -174250,33 +174383,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "platonicienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "platonicienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "platonicienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "inspiration platonicienne",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "platonicienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "platonicienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "platonicienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -174310,12 +174450,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -174372,33 +174506,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "platonicienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "platonicienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "platonicienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "platonicienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Platon",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Platon",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Platon",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -174408,12 +174549,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -174500,42 +174635,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "traces de riz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "riz",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "riz",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "riz",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "des traces de riz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "parfois des traces de riz",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la pâte de la céramique contient parfois des traces de riz",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -174622,33 +174758,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "traces de riz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des traces de riz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "riz",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des traces de riz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des traces de riz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des traces de riz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des traces de riz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -174670,12 +174813,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -174750,33 +174887,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "rien ne cache à la vue",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "rien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "rien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "rien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' horizon",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pas un arbrisseau, pas un buisson",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "rien ne cache",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -174822,12 +174966,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -174878,33 +175016,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "combat de cavalerie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "César",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "César.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -174926,12 +175071,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -175003,33 +175142,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "George",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "George",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "George",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Harrison Ford",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "George Lucas ?",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Steven Spielberg",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Steven Spielberg",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -175069,12 +175215,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -175131,33 +175271,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "datation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "datation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la datation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "datation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "datation donnée par la résonance de spin électronique",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "compatible avec la datation",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots qui répond à la question est \"la datation\".",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -175173,12 +175320,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -175253,33 +175394,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "datation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "indice d' ancienneté en contradiction avec les dates absolues obtenues par la thermoluminescence",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "compatible avec la datation donnée",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "datation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "datation donnée par la biostratigraphie",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "datation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots qui répond à la question est \"donner la datation\".",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -175313,12 +175461,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -175393,33 +175535,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quitter Hattusa",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hattusa",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Samuha",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Gasgas",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Samuha",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le pouvoir royal est alors considérablement affaibli, proche de sa perte",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le pouvoir royal",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -175465,12 +175614,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -175525,33 +175668,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cartes de base de pays entiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cartes de base de pays entiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cartes de base de pays entiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cartes de base de pays entiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "cartes de base de pays entiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cartes de base de pays entiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les cartes de base de pays entiers.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -175591,12 +175741,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -175647,33 +175791,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cartes de base de pays entiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cartes de base de pays entiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cartes de base de pays entiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la photogrammétrie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la photographie aérienne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un emploi de plus en plus systématique de la photogrammétrie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la photogrammétrie pour réaliser les cartes de base de pays entiers.",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -175701,12 +175852,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -175793,33 +175938,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Artémis de Dresde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Artémis de Dresde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Artémis de Dresde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' Artémis de Dresde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Artémis",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Artémis de Dresde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la déesse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -175841,12 +175993,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -175921,33 +176067,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "argile",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "argile",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "argile",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "argile",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "d' une matière plus ou moins poreuse et réalisées dans une argile d' une dureté bien faible",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "argile",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans une argile d'une dureté bien faible",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -175981,12 +176134,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -176043,33 +176190,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "argile d' une dureté bien faible",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans une argile d' une dureté bien faible",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans une argile d' une dureté bien faible",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans une argile",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "d' une matière plus ou moins poreuse et réalisées dans une argile d' une dureté bien faible.",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "réalisées dans une argile d' une dureté bien faible",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans une argile d'une dureté bien faible",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -176085,12 +176239,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -176171,33 +176319,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "durant la Seconde Guerre mondiale",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tué durant la Seconde Guerre mondiale",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tué durant la Seconde Guerre mondiale",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "rivière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sa vie durant la Seconde Guerre mondiale",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la vie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la \"rivière perdue\"",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -176243,12 +176398,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -176300,33 +176449,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "de nombreux objets, éléments d' architecture et sculptures issus des fouilles archéologiques réalisées dans et autour du site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "de nombreux objets, éléments d' architecture et sculptures issus des fouilles archéologiques réalisées dans et autour du site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "de nombreux objets, éléments d'architecture et sculptures issus des fouilles archéologiques réalisées dans et autour du site",
-                  "rougeL": 0.8695652173913043
+                  "rougeL": 0.8695652173913043,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de nombreux objets, éléments d' architecture et sculptures issus des fouilles archéologiques réalisées dans et autour du site.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de nombreux objets, éléments d' architecture et sculptures issus des fouilles archéologiques réalisées dans et autour du site.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "objets, éléments d' architecture et sculptures issus des fouilles archéologiques réalisées dans et autour du site",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de nombreux objets, éléments d'architecture et sculptures issus des fouilles archéologiques",
-                  "rougeL": 0.7000000000000001
+                  "rougeL": 0.7000000000000001,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -176366,12 +176522,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -176428,42 +176578,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à partir d' actes et de registres des titulaires de charges publiques de son époque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à partir d' actes et de registres des titulaires de charges publiques de son époque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à partir d' actes et de registres des titulaires de charges publiques de son époque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à partir d' actes et de registres des titulaires de charges publiques de son époque.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à partir d'actes et de registres des titulaires de charges publiques de son époque",
-                  "rougeL": 0.7777777777777777
+                  "rougeL": 0.7777777777777777,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "actes et de registres des titulaires de charges publiques de son époque",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots qui répond à la question est \"bien connue à partir d'actes et de registres des titulaires de charges publiques de son époque\".",
-                  "rougeL": 0.5833333333333334
+                  "rougeL": 0.5833333333333334,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -176551,33 +176702,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "actes et de registres des titulaires de charges publiques de son époque",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "actes et de registres des titulaires de charges publiques de son époque",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "actes et de registres des titulaires de charges publiques de son époque",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' actes et de registres des titulaires de charges publiques de son époque.",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à partir d'actes et de registres des titulaires de charges publiques de son époque",
-                  "rougeL": 0.7777777777777777
+                  "rougeL": 0.7777777777777777,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "actes et de registres des titulaires de charges publiques de son époque",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à partir d'actes et de registres des titulaires de charges publiques de son époque.",
-                  "rougeL": 0.7777777777777777
+                  "rougeL": 0.7777777777777777,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -176623,12 +176781,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -176680,33 +176832,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "effets personnels",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "du mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -176722,12 +176881,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -176802,33 +176955,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "officiers",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "du mess des officiers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -176868,12 +177028,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -176942,33 +177096,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Al - Mansur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "al - Mansur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Al - Mansur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "al - Mansur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "al - Mansur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "al - Mansur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Al-Mansur",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -176990,12 +177151,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -177082,33 +177237,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pompéi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "art hellénistique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pompéi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -177142,12 +177304,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -177216,33 +177372,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pompéi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Pompéi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pompéi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le propriétaire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -177264,12 +177427,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -177344,33 +177501,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Même avant l' arrivée des Espagnols eux-mêmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Même avant l' arrivée des Espagnols eux-mêmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Même avant l' arrivée des Espagnols eux-mêmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "avant l' arrivée des Espagnols",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "avant l'arrivée des Espagnols eux-mêmes",
-                  "rougeL": 0.7692307692307692
+                  "rougeL": 0.7692307692307692,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Même avant l' arrivée des Espagnols eux-mêmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "avant l'arrivée",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -177398,12 +177562,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -177472,33 +177630,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "lorsque les ossements auront été complètement dégagés de leur gangue de brèche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "lorsque les ossements auront été complètement dégagés de leur gangue de brèche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ossements auront été complètement dégagés de leur gangue de brèche",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "lorsque les ossements auront été complètement dégagés de leur gangue de brèche",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "dégagés de leur gangue de brèche",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dégagés de leur gangue de brèche",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une étude publiée en 2015",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -177532,12 +177697,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -177618,33 +177777,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Félix Voulot",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Une mosaque de 232 m2 a été découverte en 1883 par Félix Voulot",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Félix Voulot",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Félix Voulot",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Félix Voulot",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mosaïque",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mosaïque de Grand",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -177660,12 +177826,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -177748,33 +177908,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -177802,12 +177969,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -177870,33 +178031,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des observateurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -177930,12 +178098,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -177998,33 +178160,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' Abbé Baudry, puis le Dr Marcel Baudouin",
-                  "rougeL": 0.9285714285714286
+                  "rougeL": 0.9285714285714286,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' Abbé Baudry, puis le Dr Marcel Baudouin",
-                  "rougeL": 0.9285714285714286
+                  "rougeL": 0.9285714285714286,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "puits à eau comblés",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Abbé Baudry, puis le Dr Marcel Baudouin",
-                  "rougeL": 0.9285714285714286
+                  "rougeL": 0.9285714285714286,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' Abbé Baudry",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "puits à eau comblés",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Abbé Baudry et le Dr Marcel Baudouin",
-                  "rougeL": 0.8148148148148148
+                  "rougeL": 0.8148148148148148,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -178046,12 +178215,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -178127,33 +178290,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Andrea Palladio et Antonia da Sangallo le Jeune.",
-                  "rougeL": 0.896551724137931
+                  "rougeL": 0.896551724137931,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -178193,12 +178363,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -178252,33 +178416,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des architectes comme Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.967741935483871
+                  "rougeL": 0.967741935483871,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Andrea Palladio et Antonio da Sangallo le Jeune.",
-                  "rougeL": 0.9655172413793104
+                  "rougeL": 0.9655172413793104,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -178318,12 +178489,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -178395,33 +178560,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le peintre des Enfers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le peintre des Enfers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le peintre des Enfers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le peintre des Enfers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le peintre des Enfers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le peintre des Enfers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le peintre des Enfers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -178455,12 +178627,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -178523,33 +178689,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Kirghizes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Kirghizes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Kirghizes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Sarikol",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Kirghizes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Stein",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'expédition ou Ferdinand von Richthofen",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -178559,12 +178732,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -178651,33 +178818,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour suivre les cours de l' école d' état-major",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "suivre les cours de l' école d' état-major",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour suivre les cours de l' école d' état-major",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pour suivre les cours de l' école d' état-major",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour suivre les cours de l' école d' état-major",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour suivre les cours de l' école d' état-major",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour suivre les cours de l'école d'état-major",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -178705,12 +178879,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -178773,33 +178941,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "école d' état-major",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les cours de l' école d' état-major",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' école d' état-major",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les cours de l' école d' état-major",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "école d' état-major",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les cours de l' école d' état-major",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les cours de l'école d'état-major",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -178845,12 +179020,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -178913,42 +179082,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "peuple des Santons",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le nom d' origine de la ville",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les contemporains",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Mediolanum Santonum",
-                  "rougeL": 0.39999999999999997
+                  "rougeL": 0.39999999999999997,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Mediolanum Santonum (Saintes)",
-                  "rougeL": 0.3529411764705882
+                  "rougeL": 0.3529411764705882,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' oppidum",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Pons",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -179059,33 +179229,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Praxitèle jeune",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Artémis de Dresde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Artémis de Dresde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Artémis de Dresde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Artémis",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Artémis de Dresde",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la déesse (Artémis)",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -179125,12 +179302,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -179187,33 +179358,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "offrandes et sacrifice",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "feuilles de coca",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ayahuasca qui a des effets hallucinogènes en affectant le système nerveux central.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ayahuasca",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de l'ayahuasca",
-                  "rougeL": 0.8000000000000002
+                  "rougeL": 0.8000000000000002,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -179229,12 +179407,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -179315,33 +179487,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Dion Cassius",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "combat de cavalerie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "César",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Il",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "César",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -179357,12 +179536,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -179443,33 +179616,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sur la Seine gelée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sur la Seine gelée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sur la Seine gelée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la Seine gelée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur la Seine gelée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Seine gelée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur la Seine gelée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -179515,12 +179695,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -179571,33 +179745,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -179637,12 +179818,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -179693,33 +179868,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le 26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 26 octobre 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -179747,12 +179929,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -179821,33 +179997,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "au début du VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "début du VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au début du VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "première moitié du Ier siècle",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans la première moitié du Ier siècle",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "première moitié du Ier siècle",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le pont du Gard est utilisé dans la première moitié du Ier siècle.",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -179857,12 +180040,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -179943,33 +180120,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "VIe siècle",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "VIe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "VIe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ier siècle",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ve siècle",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ier siècle",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au début du VIe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -179979,12 +180163,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -180071,33 +180249,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Londres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "site",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il quitta donc le site",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -180107,12 +180292,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -180199,33 +180378,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Une autre motte féodale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Une autre motte féodale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Une autre motte féodale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "motte féodale",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "une motte féodale",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une motte féodale",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "une autre motte féodale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -180265,12 +180451,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -180327,33 +180507,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "divers programmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "divers programmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "divers programmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "divers programmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "divers programmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "divers programmes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Par divers programmes.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -180381,12 +180568,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -180455,33 +180636,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bord d' un lac qui occupait la plaine sur laquelle s' étend maintenant la banlieue de Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au bord d' un lac",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au bord d' un lac qui occupait la plaine sur laquelle s' étend maintenant la banlieue de Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "au bord d' un lac",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur un lac qui occupait la plaine sur laquelle s' étend maintenant la banlieue de Mexico",
-                  "rougeL": 0.962962962962963
+                  "rougeL": 0.962962962962963,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "au bord d' un lac qui occupait la plaine sur laquelle s' étend maintenant la banlieue de Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au bord d'un lac",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -180515,12 +180703,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -180580,33 +180762,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un lac qui occupait la plaine sur laquelle s' étend maintenant la banlieue de Mexico",
-                  "rougeL": 0.962962962962963
+                  "rougeL": 0.962962962962963,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "lac qui occupait la plaine sur laquelle s' étend maintenant la banlieue de Mexico",
-                  "rougeL": 0.962962962962963
+                  "rougeL": 0.962962962962963,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "lac qui occupait la plaine sur laquelle s' étend maintenant la banlieue de Mexico",
-                  "rougeL": 0.962962962962963
+                  "rougeL": 0.962962962962963,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "au bord d' un lac",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Mexico",
-                  "rougeL": 0.35294117647058826
+                  "rougeL": 0.35294117647058826,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un lac qui occupait la plaine sur laquelle s' étend maintenant la banlieue de Mexico",
-                  "rougeL": 0.962962962962963
+                  "rougeL": 0.962962962962963,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le lac",
-                  "rougeL": 0.13333333333333333
+                  "rougeL": 0.13333333333333333,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -180634,12 +180823,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -180711,33 +180894,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1961",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1961.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -180765,12 +180955,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -180833,33 +181017,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1958",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -180887,12 +181078,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -181021,33 +181206,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Il",
-                  "rougeL": 0
+                  "rougeL": 0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -181093,12 +181285,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -181203,42 +181389,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "il décide de céder les biens de sa propre famille, et fait lui-même le boniment de la marchandise",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il",
-                  "rougeL": 0
+                  "rougeL": 0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Caligula",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -181343,33 +181530,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "panneau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "panneau de marbre avec la figure sculptée d' un géant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "panneau de marbre avec la figure sculptée d' un géant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un panneau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "panneau de marbre avec la figure sculptée d' un géant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "panneau de marbre avec la figure sculptée d' un géant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le panneau.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -181385,12 +181579,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -181477,33 +181665,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "panneau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "panneau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "panneau de marbre avec la figure sculptée d' un géant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "panneau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "panneau de marbre avec la figure sculptée d' un géant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "panneau de marbre",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le panneau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -181549,12 +181744,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -181605,33 +181794,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "système nerveux central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le système nerveux central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "système nerveux central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "système nerveux central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "système nerveux central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "système nerveux central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le système nerveux central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -181671,12 +181867,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -181727,33 +181917,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nerveux central",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "nerveux central",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nerveux central",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "divination",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "système nerveux central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "système nerveux central",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le système nerveux central.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -181793,12 +181990,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -181855,33 +182046,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les caractères germaniques des statuts grecques et romaines.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -181897,12 +182095,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -181977,33 +182169,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "germaniques des statuts grecques et romaines",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "caractères germaniques des statuts grecques et romaines",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "germaniques des statuts grecques et romaines",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "caractères germaniques",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les caractères germaniques des statuts grecques et romaines.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -182043,12 +182242,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -182105,33 +182298,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une configuration, à savoir celle d' un bateau tombe.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -182165,12 +182365,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -182228,33 +182422,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Basil Brown",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -182294,12 +182495,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -182357,33 +182552,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1976",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1976",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1976",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en 1976",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1976 durant la Révolution culturelle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1976",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1976",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -182411,12 +182613,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -182485,33 +182681,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1976",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1976",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1976 durant la Révolution culturelle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1976",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "1976 durant la Révolution culturelle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1976 and phase finale (v. 1250 - 1046)",
-                  "rougeL": 0.15384615384615383
+                  "rougeL": 0.15384615384615383,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1976 durant la Révolution culturelle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -182533,12 +182736,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -182613,33 +182810,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1924",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1924",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1924",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "(1854 - 1924),",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1924.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1924",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Manuel Luque meurt dans cette ville.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -182661,12 +182865,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -182737,42 +182935,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1854",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1854 - 1924",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1854 - 1924",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "(1854 - 1924),",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1924.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1924",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'y a pas d'information dans l'article sur le moment où Manuel Luque meurt dans cette ville.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -182879,33 +183078,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pendant les trois ans qu' il passe dans ce pays, il s' adonne surtout à des recherches sur les antiquités et sur les anciennes doctrines religieuses du Moyen-Orient",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ce séjour",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La mission Gardanne est alors abandonnée",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la protection de son oncle",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des recherches sur les antiquités et sur les anciennes doctrines religieuses du Moyen-Orient",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "recherches sur les antiquités et sur les anciennes doctrines religieuses du Moyen-Orient",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le séjour dans le pays et les recherches sur les antiquités et les anciennes doctrines religieuses du Moyen-Orient.",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -182945,12 +183151,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -183013,33 +183213,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pendant les trois ans qu' il passe dans ce pays, il s' adonne surtout à des recherches sur les antiquités et sur les anciennes doctrines religieuses du Moyen-Orient",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ce séjour",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La mission Gardanne est alors abandonnée",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ce séjour décide de la vocation scientifique du jeune Lajard. Pendant les trois ans qu' il passe dans ce pays,",
-                  "rougeL": 0.14285714285714288
+                  "rougeL": 0.14285714285714288,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des recherches sur les antiquités et sur les anciennes doctrines religieuses du Moyen-Orient",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "séjour dans ce pays",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le séjour dans le pays",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -183055,12 +183262,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -183159,33 +183360,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son architecte personnel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son architecte personnel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son architecte personnel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "architecte personnel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "son architecte personnel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son architecte personnel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Son architecte personnel",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -183207,12 +183415,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -183287,33 +183489,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une belle mosaïque au labyrinthe",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une belle mosaque au labyrinthe",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une belle mosaque au labyrinthe",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une belle mosaïque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mosaïque au labyrinthe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une belle mosaïque au labyrinthe",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une belle mosaïque au labyrinthe",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -183359,12 +183568,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -183409,33 +183612,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mosaïque au labyrinthe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mosaque au labyrinthe",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mosaque",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mosaïque",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "mosaïque au labyrinthe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mosaïque au labyrinthe",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une belle mosaïque au labyrinthe",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -183469,12 +183679,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -183537,33 +183741,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -183591,12 +183802,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -183659,33 +183864,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -183713,12 +183925,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -183781,33 +183987,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 25 février 1917",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -183847,12 +184060,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -183909,33 +184116,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "2003",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "2003",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "2003",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "2003,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "2003 : découverte des ponts antiques",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "2003",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "2003 : découverte des ponts antiques.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -183957,12 +184171,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -184032,33 +184240,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "2003",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "2003",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "2003",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "2003,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "2003",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "2003",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "2003 : découverte des ponts antiques",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -184080,12 +184295,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -184161,33 +184370,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Cet homme vivait au milieu de forêts et de prairies, où il pratiquait chasse et pêche",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Homme de Zhoukoudian",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Homme de Zhoukoudian",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' Homme de Zhoukoudian",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Homo sapiens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Homme de Zhoukoudian",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Cet homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -184203,12 +184419,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -184292,33 +184502,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "poignards en cuivre",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Plusieurs poignards en cuivre",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Plusieurs poignards en cuivre supposés d' origine Rinaldone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Plusieurs poignards en cuivre",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "poignards en cuivre",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "poignards en cuivre supposés d' origine Rinaldone",
-                  "rougeL": 0.9523809523809523
+                  "rougeL": 0.9523809523809523,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Plusieurs poignards en cuivre supposés d'origine Rinaldone ont même été découverts en Suisse.",
-                  "rougeL": 0.7692307692307692
+                  "rougeL": 0.7692307692307692,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -184352,12 +184569,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -184414,33 +184625,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "poignards en cuivre",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "poignards en cuivre",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Plusieurs poignards en cuivre supposés d' origine Rinaldone",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "poignards en cuivre",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "poignards en cuivre",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "poignards en cuivre",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Plusieurs poignards en cuivre",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -184480,12 +184698,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -184560,33 +184772,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "deux plongeurs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Santo António de Tanna",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "deux plongeurs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Santo António de Tanna,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la Santo António de Tanna",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la Santo António de Tanna, sous le commandement du capitaine Domingos Pereira de Gusman",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La Santo António de Tanna",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -184602,12 +184821,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -184696,33 +184909,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -184762,12 +184982,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -184818,33 +185032,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "forestier",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "forestier",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "forestier",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "forestier mais aussi de clairières",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un mélange de couvert forestier",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -184872,12 +185093,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -184946,33 +185161,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "professeur de sa discipline au collège de France où il est titulaire de la chaire de Paléoanthropologie et de préhistoire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "professeur de sa discipline au collège de France où il est titulaire de la chaire de Paléoanthropologie et de préhistoire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "il est directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "professeur de sa discipline au collège de France",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "professeur de sa discipline",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -185018,12 +185240,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -185068,33 +185284,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "collège de France",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -185128,12 +185351,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -185190,33 +185407,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "musées",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "musées",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "différents musées",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "musées",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de différents musées",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "différents musées",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de différents musées",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -185226,12 +185450,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -185318,33 +185536,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un autre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un autre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un autre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un autre, sachant écrire,",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un autre, sachant écrire",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un autre, sachant écrire",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un autre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -185384,12 +185609,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -185448,33 +185667,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "installation fixe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fixe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "fixe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "installation fixe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "fixe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fixe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le four devient une installation fixe.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -185490,12 +185716,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -185576,33 +185796,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il existe à l'extérieur du cercle de sarsen deux autres cercles un peu irréguliers",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -185636,12 +185863,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -185700,33 +185921,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux autres cercles un peu irréguliers de chacun 30 grandes cavités",
-                  "rougeL": 0.7058823529411764
+                  "rougeL": 0.7058823529411764,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "deux autres cercles",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -185766,12 +185994,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -185830,33 +186052,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "écrivent des choses sur ce peuple",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "écrivent des choses sur ce peuple",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "choses sur ce peuple : cette période de transition « entre » la Préhistoire et l' Histoire s' appelle la Protohistoire",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "écrivent des choses sur ce peuple",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Qu'un peuple ne connaisse pas l'écriture, mais qu'un autre, sachant écrire, écrivent des choses sur ce peuple.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "écrivent des choses sur ce peuple",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -185872,12 +186101,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -185970,33 +186193,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trois chasseurs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.2
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
                 "llama-2_lora": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
                 "GPT-3.5": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 }
               },
               "human_annot": {
@@ -186168,48 +186398,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -186423,33 +186611,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "randonneurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "randonneurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "randonneurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des randonneurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des randonneurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des randonneurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des randonneurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -186465,12 +186660,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -186551,33 +186740,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fin de l' âge de la pierre vers 6000 av. J.-C.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fin de l' âge de la pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à la fin de l' âge de la pierre vers 6000 av. J.-C.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fin de l' âge de la pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'âge de la pierre vers 6000 av. J.-C.",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la fin de l'âge de la pierre",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la fin de l' âge de la pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -186623,12 +186819,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -186679,33 +186869,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les hommes de l'Aurignacien.",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -186745,12 +186942,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -186802,33 +186993,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Aurignacien",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les hommes de l'Aurignacien",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -186868,12 +187066,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -186931,33 +187123,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pléistocène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pléistocène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pléistocène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pléistocène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1884",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pléistocène (quaternaire)",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "du pléistocène (quaternaire)",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -186967,12 +187166,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -187053,42 +187246,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pléistocène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "En 1884",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1884",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pléistocène",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1884",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans les alluvions de la Somme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il découvre dans les alluvions de la Somme des outils en silex qu'il date du pléistocène (quaternaire).",
-                  "rougeL": 0.34782608695652173
+                  "rougeL": 0.34782608695652173,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -187193,33 +187387,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "site archéologiques situés en France",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sites archéologiques situés en France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "France",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "France",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "France, où on a pour la première fois identifié une « culture » particulière.",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "France",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en France",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -187259,12 +187460,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -187322,33 +187517,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à l' entrée des grottes et des cavernes.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à l'entrée des grottes et des cavernes",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -187382,12 +187584,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -187444,33 +187640,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grottes et des cavernes",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grottes et des cavernes",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des grottes et des cavernes",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grottes et des cavernes",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "grottes et des cavernes",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "grottes et des cavernes",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à l'entrée des grottes",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -187510,12 +187713,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -187572,33 +187769,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -187614,12 +187818,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -187700,33 +187898,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Édouard Piette est mort à Rumigny.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -187754,12 +187959,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -187824,33 +188023,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Rumigny",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -187884,12 +188090,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -187954,33 +188154,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le petit sac de cuir contient de petits outils de silex (grattoir, perçoir, lame pointue).",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -187990,12 +188197,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -188076,33 +188277,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -188142,12 +188350,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -188198,78 +188400,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.9
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.9
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.9
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.9
                 },
                 "llama-2_lora": {
                   "answer_pred": "petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un nécessaire à faire du feu (amadou, silex) et des petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.35
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -188644,33 +188811,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de petits outils de silex",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -188698,12 +188872,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -188772,33 +188940,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sac de cuir",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un petit sac de cuir contenant de petits outils de silex (grattoir, perçoir, lame pointue).",
-                  "rougeL": 0.3157894736842105
+                  "rougeL": 0.3157894736842105,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un petit sac de cuir contenant de petits outils de silex (grattoir, perçoir, lame pointue)",
-                  "rougeL": 0.3157894736842105
+                  "rougeL": 0.3157894736842105,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -188826,12 +189001,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -188900,33 +189069,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des grattoirs carénés qui servent à gratter les peaux, burins carénés et burins busqués",
-                  "rougeL": 0.6923076923076924
+                  "rougeL": 0.6923076923076924,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "burins busqués",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -188948,12 +189124,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -189026,33 +189196,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -189086,12 +189263,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -189158,33 +189329,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "être humain ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à un être humain ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tzi tzi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un être humain ancien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ötzi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ötzi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le nom a été donné à Ötzi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -189218,12 +189396,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -189304,33 +189476,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "arc",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "arc",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La réalisation de propulseur",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' arc",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' arc",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "arc",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'arc",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -189346,12 +189525,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -189433,33 +189606,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Hommes de la Préhistoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hommes de la Préhistoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Hommes de la Préhistoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les Hommes de la Préhistoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hommes de la Préhistoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Hommes de la Préhistoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Hommes de la Préhistoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -189493,12 +189673,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -189562,33 +189736,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -189622,12 +189803,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -189684,33 +189859,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "30000",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "30000",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "30000",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vers 30000",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -189726,12 +189908,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -189812,33 +189988,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trois",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "trois",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trois",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "trois",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "trois grandes périodes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "trois",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "trois grandes périodes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -189866,12 +190049,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -189940,33 +190117,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "formation du calcaire et des fossiles du nord-est de la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "formation du calcaire et des fossiles du nord-est de la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "formation du calcaire et des fossiles du nord-est de la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la formation du calcaire et des fossiles du nord-est de la France,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "formation du calcaire et des fossiles du nord-est de la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la formation du calcaire et des fossiles du nord-est de la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la formation du calcaire et des fossiles du nord-est de la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -189982,12 +190166,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -190062,42 +190240,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "formation du calcaire et des fossiles du nord-est de la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "formation du calcaire et des fossiles du nord-est de la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "formation du calcaire et des fossiles du nord-est de la France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "calcaire et des fossiles du nord-est de la France,",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "géologie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la formation du calcaire et des fossiles",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le calcaire et les fossiles du nord-est de la France",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -190190,33 +190369,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' homme retrouvé à Combe - Capelle en Dorogne",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "L' homme retrouvé à Combe - Capelle",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' homme retrouvé à Combe - Capelle en Dordogne",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' homme retrouvé à Combe - Capelle en Dordogne,",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "homme de Combe - Capelle",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' homme retrouvé à Combe - Capelle en Dordogne",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'homme retrouvé à Combe - Capelle en Dordogne",
-                  "rougeL": 0.4285714285714285
+                  "rougeL": 0.4285714285714285,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -190262,12 +190448,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -190312,33 +190492,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' homme retrouvé à Combe - Capelle en Dorogne",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "homme retrouvé à Combe - Capelle",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "homme retrouvé à Combe - Capelle en Dordogne",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' homme retrouvé à Combe - Capelle en Dordogne,",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'homme retrouvé à Combe - Capelle en Dordogne",
-                  "rougeL": 0.4285714285714285
+                  "rougeL": 0.4285714285714285,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' homme retrouvé à Combe - Capelle en Dordogne",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'homme retrouvé à Combe-Capelle en Dordogne.",
-                  "rougeL": 0.4285714285714285
+                  "rougeL": 0.4285714285714285,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -190348,12 +190535,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -190440,33 +190621,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un mélange de couvert forestier mais aussi de clairières",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -190482,12 +190670,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -190568,33 +190750,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Denis Peyrony a découvert la grotte en 1901.",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -190628,12 +190817,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -190690,33 +190873,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1901. La grotte à été découverte en 1901 par Denis Peyrony, elle est classé monument historique l' année suivante.",
-                  "rougeL": 0.125
+                  "rougeL": 0.125,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1901",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -190756,12 +190946,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -190818,33 +191002,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -190854,12 +191045,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -190946,33 +191131,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "culture",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.75
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une « culture » particulière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une « culture » particulière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "Camembert_baseline": {
                   "answer_pred": "outils de pierre ou d' os",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une « culture » particulière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une « culture » particulière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 },
                 "GPT-3.5": {
                   "answer_pred": "une « culture » particulière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 }
               },
               "human_annot": {
@@ -191102,48 +191294,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -191398,33 +191548,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "décrivit les outils trouvés sur le site",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les outils trouvés",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -191458,12 +191615,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -191520,33 +191671,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "décrivit les outils trouvés sur le site",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les outils trouvés",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -191568,12 +191726,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -191648,33 +191800,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la population",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la population",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la population",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "population",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la population",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la population",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la population",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -191708,12 +191867,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -191777,33 +191930,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plusieurs dizaines de kilomètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "plusieurs dizaines de kilomètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plusieurs dizaines de kilomètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "plusieurs dizaines de kilomètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "de plusieurs dizaines de kilomètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quelquefois de plusieurs dizaines de kilomètres",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de plusieurs dizaines de kilomètres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -191849,12 +192009,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -191905,33 +192059,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les hommes de l'Aurignacien.",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -191959,12 +192120,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -192028,33 +192183,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Aurignacien",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les hommes de l' Aurignacien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les hommes de l'Aurignacien",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -192076,12 +192238,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -192157,33 +192313,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "l' homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "on ne sait pas",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -192229,12 +192392,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -192300,33 +192457,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "guerriers professionnels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "guerriers professionnels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "guerriers professionnels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "guerriers professionnels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "guerriers professionnels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "guerriers professionnels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les guerriers professionnels",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -192360,12 +192524,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -192429,33 +192587,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Australopithecus afarensis",
-                  "rougeL": 0.6956521739130436
+                  "rougeL": 0.6956521739130436,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Lucy",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Australopithecus afarensis",
-                  "rougeL": 0.6956521739130436
+                  "rougeL": 0.6956521739130436,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' australopithèque",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Lucy (australopithèque)",
-                  "rougeL": 0.3
+                  "rougeL": 0.3,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "fossile de Lucy",
-                  "rougeL": 0.1111111111111111
+                  "rougeL": 0.1111111111111111,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Lucy (australopithèque)",
-                  "rougeL": 0.3
+                  "rougeL": 0.3,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -192471,12 +192636,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -192557,33 +192716,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "magdalénien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "magdalénien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "magdalénien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "du magdalénien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "magdalénien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "du magdalénien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "magdalénien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -192623,12 +192789,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -192685,33 +192845,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le 5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le 5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le 5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 5 août 1868.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -192745,12 +192912,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -192809,33 +192970,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "5 août 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -192869,12 +193037,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -192939,33 +193101,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à l' entrée des grottes et des cavernes.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à l'entrée des grottes et des cavernes.",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -192999,12 +193168,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -193061,33 +193224,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' entrée des grottes et des cavernes.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à l' entrée des grottes et des cavernes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à l'entrée des grottes et des cavernes",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -193109,12 +193279,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -193189,33 +193353,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "conditions atmosphériques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "conditions atmosphériques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "conditions atmosphériques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "conditions atmosphériques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par les conditions atmosphériques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "conditions atmosphériques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les conditions atmosphériques",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -193243,12 +193414,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -193317,33 +193482,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "blocs de pierre de plusieurs dizaines de tonnes",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "blocs de pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "blocs de pierre de plusieurs dizaines de tonnes",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "blocs de pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "blocs de pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "blocs de pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "blocs de pierre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -193377,12 +193549,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -193446,33 +193612,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "rares",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "rares",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "rares",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "devenues rares",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "rares",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "devenues rares",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "rares",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -193482,12 +193655,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -193574,33 +193741,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Tchad",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Tchad",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Tchad",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tchad",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tchad",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans la même région, dans le but de découvrir d' autres fossiles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au Tchad",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -193640,12 +193814,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -193703,33 +193871,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' Homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "L' Homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' Homme",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -193769,12 +193944,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -193843,33 +194012,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "La Vénus de Willendorf",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Faite de calcaire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La Vénus de Willendorf",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vénus de Willendorf La Vénus de Willendorf",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vénus de Willendorf",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "La Vénus de Willendorf",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La Vénus de Willendorf",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -193903,12 +194079,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -193971,42 +194141,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "entre -15500 et -13500 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "entre -15500 et -13500 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "entre -15500 et -13500 ans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "entre -15500 et -13500 ans,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "-15500 et -13500 ans",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "-15500 et -13500 ans",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les dates sont comprises entre -15500 et -13500 ans.",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -194100,33 +194271,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Andrewsarchus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Andrewsarchus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Andrewsarchus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Andrewsarchus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Andrewsarchus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Andrewsarchus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Andrewsarchus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -194340,48 +194518,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -194552,33 +194688,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Néolithique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le Néolithique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le Néolithique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Néolithique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "civilisation danubienne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' agriculture",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "civilisation danubienne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -194612,12 +194755,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -194674,33 +194811,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le Néolithique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le Néolithique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le Néolithique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Néolithique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "civilisation danubienne",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' agriculture",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Néolithique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -194740,12 +194884,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -194802,33 +194940,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "producteurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "producteurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "producteurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ils deviennent sédentaires et construisent des villages",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "producteurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deviennent producteurs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ils deviennent sédentaires.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -194838,12 +194983,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -194930,33 +195069,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "avec un propulseur de taille moyenne (75 centimètres)",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "avec un propulseur de taille moyenne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "avec un propulseur de taille moyenne (75 centimètres)",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "avec un propulseur de taille moyenne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "avec un propulseur de taille moyenne (75 centimètres)",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "avec un propulseur de taille moyenne (75 centimètres)",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "envoyée avec un propulseur",
-                  "rougeL": 0.6666666666666665
+                  "rougeL": 0.6666666666666665,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -194990,12 +195136,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -195052,33 +195192,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "propulseur",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "propulseur",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "avec un propulseur de taille moyenne (75 centimètres)",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "projectile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "projectile de taille moyenne (75 centimètres)",
-                  "rougeL": 0.4000000000000001
+                  "rougeL": 0.4000000000000001,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "propulseur",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "avec un propulseur de taille moyenne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -195100,12 +195247,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -195180,42 +195321,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ce jour -là",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "15000 ans",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "il y a 15000 ans",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ce jour -là,",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "15000 ans",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ce jour -là",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ce jour-là",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -195303,33 +195445,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Meruti est un garçon d' une dizaine d' années, vivant il y a 15000 ans",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "15000 ans",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' histoire Meruti est un garçon d' une dizaine d' années, vivant il y a 15000 ans",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "15000 ans",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "15000 ans",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ce jour -là",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ce jour",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -195357,12 +195506,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -195432,33 +195575,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ses liens possibles avec l' astronomie ancienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour ses liens possibles avec l' astronomie ancienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ses liens possibles avec l' astronomie ancienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "liens possibles avec l' astronomie ancienne.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour ses liens possibles avec l' astronomie ancienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "liens possibles avec l' astronomie ancienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour ses liens possibles avec l' astronomie ancienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -195504,12 +195654,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -195560,33 +195704,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la terre pour les murs, voire la pierre",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la terre pour les murs",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "leurs maisons peuvent donc être plus solides et plus travaillées",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "on y a trouvé de nombreux objets fabriqués par les hommes, et des restes des animaux qu' ils chassaient.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour les murs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Comme ils ne changent plus d' endroit, et que leurs techniques et connaissances sont plus développées",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour les murs, voire la pierre.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -195626,12 +195777,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -195689,33 +195834,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grattoir, perçoir, lame pointue",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sac de cuir",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cuir",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -195725,12 +195877,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -195811,33 +195957,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sac de cuir",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "un petit sac de cuir contenant de petits outils de silex (grattoir, perçoir, lame pointue).",
-                  "rougeL": 0.3157894736842105
+                  "rougeL": 0.3157894736842105,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -195847,12 +196000,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -195939,33 +196086,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Mission Paléoanthropologique Franco - Tchadienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Mission Paléoanthropologique Franco - Tchadienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Mission Paléoanthropologique Franco - Tchadienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Mission Paléoanthropologique Franco - Tchadienne (MPFT)",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Mission Paléoanthropologique Franco - Tchadienne (MPFT)",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Mission Paléoanthropologique Franco - Tchadienne (MPFT)",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Mission Paléoanthropologique Franco - Tchadienne (MPFT)",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -195981,12 +196135,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -196067,33 +196215,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -196127,12 +196282,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -196189,33 +196338,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "-600",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "-600",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "-600",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "-600",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "-600",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "-600",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -196255,12 +196411,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -196317,33 +196467,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une hotte munie d' une armature formée d' une longue tige de noisetier, deux récipients en écorce de bouleau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "hotte munie d' une armature formée d' une longue tige de noisetier, deux récipients en écorce de bouleau, un petit sac comprenant un nécessaire à faire du feu",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "céréales, de cerf et de bouquetin.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "céréales, cerf et bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le dernier repas d' Ötzi se composait de céréales, de cerf et de bouquetin.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -196377,12 +196534,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -196440,33 +196591,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "céréales, de cerf et de bouquetin.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "céréales, cerf et bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "céréales, cerf et bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -196512,12 +196670,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -196569,33 +196721,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'abbé Breuil",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -196629,12 +196788,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -196691,33 +196844,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'abbé Breuil",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -196757,12 +196917,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -196819,33 +196973,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "-2300",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "autour de -2300",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "-2300",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "autour de -2300.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "-2300",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "autour de -2300",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le squelette d'un homme de l'âge du bronze a été découvert dans le fossé extérieur et il est mort autour de -2300.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -196867,12 +197028,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -196941,33 +197096,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "-2300",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "-2300",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "-2300",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "-2300.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "-2300",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "-2300",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "autour de -2300.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -197007,12 +197169,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -197069,33 +197225,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -197117,12 +197280,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -197197,33 +197354,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "âge de la pierre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "âge de la pierre vers 6000 av. J.-C",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "âge de la pierre vers 6000 av. J.-C.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' âge de la pierre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "6000 av. J.-C.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' âge de la pierre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' âge de la pierre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -197251,12 +197415,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -197337,33 +197495,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "coquillages perforés, des objets sculptés en ronde bosse mais aussi des peintures",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fouillée",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "coquillages perforés, des objets sculptés en ronde bosse mais aussi des peintures",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "coquillages perforés, des objets sculptés en ronde bosse mais aussi des peintures",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "site éponyme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "site éponyme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La grotte d'Aurignac en Haute-Garonne a donné son nom à cette culture en 1906.",
-                  "rougeL": 0.13333333333333333
+                  "rougeL": 0.13333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -197397,12 +197562,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -197465,33 +197624,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "or.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -197531,12 +197697,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -197587,33 +197747,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' origine principale du bronze",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' origine principale du bronze",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le bronze",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -197647,12 +197814,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -197715,33 +197876,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "La fragilité des poteries, surtout celles de terre séchée",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -197775,12 +197943,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -197839,33 +198001,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "poteries, surtout celles de terre séchée",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les poteries",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -197911,12 +198080,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -197969,33 +198132,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'abbé Breuil",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -198029,12 +198199,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -198091,33 +198255,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' abbé Breuil",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -198163,12 +198334,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -198219,33 +198384,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Florès",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "île de Florès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "île de Florès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' île de Florès,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Florès",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "île de Florès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'île de Florès",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -198267,12 +198439,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -198341,33 +198507,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "île de Florès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fossiles de l' Homme de Florès",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "île de Florès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fossiles de l' Homme de Florès",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "grottes de Liang Bua",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' île de Florès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'Homme de Florès",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -198383,12 +198556,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -198469,33 +198636,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une sagaie de 2 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sagaie de 2 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le projectile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "(une sagaie de 2 mètres de long)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sagaie de 2 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projectile (une sagaie de 2 mètres de long)",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la sagaie de 2 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -198517,12 +198691,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -198592,33 +198760,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projectile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sagaie de 2 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le projectile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "(une sagaie de 2 mètres de long)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sagaie de 2 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projectile (une sagaie de 2 mètres de long)",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une sagaie de 2 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -198640,12 +198815,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -198721,33 +198890,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "par ces flèches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "par ces flèches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "par ces flèches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Plusieurs pointes de flèches",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "par des flèches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "par ces flèches",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'homme a été tué par des flèches.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -198787,12 +198963,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -198849,33 +199019,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -198909,12 +199086,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -198982,33 +199153,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les ossements",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ossements",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ossements",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les ossements (de grande taille) des mammouths",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de la peau qui a disparu depuis",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ossements (de grande taille) des mammouths",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les ossements des mammouths",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -199048,12 +199226,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -199105,33 +199277,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les ossements (de grande taille) des mammouths ont été utilisés pour construire l' armature des huttes",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.9
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ossements",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.7
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la peau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.15
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ossements",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.7
                 },
                 "llama-2_lora": {
                   "answer_pred": "ossements",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.7
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ossements (de grande taille) des mammouths",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les ossements (de grande taille) des mammouths",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -199345,48 +199524,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -199558,33 +199695,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Proche-Orient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Proche-Orient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Au Proche-Orient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Au Proche-Orient, elle commence vers 8000 av. J.-C.,",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "vers 8000 av. J.-C.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Au Proche-Orient",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Au Proche-Orient.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -199612,12 +199756,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -199698,33 +199836,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ces outils étaient probablement utilisés pour tuer des animaux, mais c' est très incertain",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ces outils étaient probablement utilisés pour tuer des animaux, mais c' est très incertain",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "couper, de piquer et de frapper",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "enlevant des éclats sur un bloc de pierre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "par des bifaces",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "c'est très incertain",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -199740,12 +199885,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -199833,33 +199972,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ces outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bifaces",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bifaces",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "coup de poing",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les bifaces",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -199893,12 +200039,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -199962,33 +200102,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -200022,12 +200169,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -200088,33 +200229,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Yves Coppens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -200148,12 +200296,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -200220,33 +200362,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un petit sac de cuir contenant de petits outils de silex (grattoir, perçoir, lame pointue).",
-                  "rougeL": 0.3157894736842105
+                  "rougeL": 0.3157894736842105,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -200262,12 +200411,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -200342,33 +200485,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sac de cuir",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sac de cuir",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un petit sac de cuir contenant de petits outils de silex (grattoir, perçoir, lame pointue).",
-                  "rougeL": 0.3157894736842105
+                  "rougeL": 0.3157894736842105,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans un petit sac de cuir",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -200408,12 +200558,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -200470,33 +200614,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projectile",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un projectile",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bouquetin",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "projectile",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "projectile plutôt long et lourd",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projectile",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le propulseur",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -200512,12 +200663,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -200599,33 +200744,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "diriger des fouilles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "professeur de sa discipline au collège de France",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il est nommé directeur de différents musées",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -200665,12 +200817,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -200721,33 +200867,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "diriger des fouilles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -200775,12 +200928,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -200849,42 +200996,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "professeur de sa discipline au collège de France où il est titulaire de la chaire de Paléoanthropologie et de préhistoire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "professeur de sa discipline au collège de France où il est titulaire de la chaire de Paléoanthropologie et de préhistoire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "professeur de sa discipline au collège de France",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -200977,33 +201125,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "-600",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "vers -600",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -201031,12 +201186,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -201105,33 +201254,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "projectile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sagaie de 2 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le projectile",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "(une sagaie de 2 mètres de long)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une sagaie de 2 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "projectile (une sagaie de 2 mètres de long)",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une sagaie de 2 mètres de long",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -201147,12 +201303,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -201234,33 +201384,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la première étude sur la grotte et recensa à peu près 200 peintures ou gravures",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "première étude sur la grotte",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "première étude sur la grotte",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la première étude sur la grotte et recensa à peu près 200 peintures ou gravures",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la première étude sur la grotte",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la première étude sur la grotte",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'abbé Breuil réalisa la première étude sur la grotte.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -201288,12 +201445,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -201362,33 +201513,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Moyen-Orient et de l' Asie du Sud-Est",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 0.8125
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Moyen-Orient et de l' Asie du Sud-Est",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 0.8125
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Moyen-Orient et de l' Asie du Sud-Est",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 0.8125
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Moyen-Orient et de l' Asie du Sud-Est",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 0.8125
                 },
                 "llama-2_lora": {
                   "answer_pred": "Moyen-Orient et de l'Asie du Sud-Est",
-                  "rougeL": 0.7692307692307692
+                  "rougeL": 0.7692307692307692,
+                  "HScore": 0.8125
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Moyen-Orient, Asie du Sud-Est, Europe, Asie, Afrique, Amériques",
-                  "rougeL": 0.5714285714285714
+                  "rougeL": 0.5714285714285714,
+                  "HScore": 0.8125
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Moyen-Orient et l'Asie du Sud-Est, l'Europe et le reste de l'Asie et de l'Afrique, les Amériques.",
-                  "rougeL": 0.43478260869565216
+                  "rougeL": 0.43478260869565216,
+                  "HScore": 0.875
                 }
               },
               "human_annot": {
@@ -201596,40 +201754,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -201697,16 +201821,6 @@
                     "annot": "annot_10",
                     "rating": "Correct"
                   }
-                ],
-                "question": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Question douteuse"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Question douteuse"
-                  }
                 ]
               },
               "lu_in_question": false,
@@ -201752,33 +201866,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "céréales, de cerf et de bouquetin.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "céréales, cerf et bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le dernier repas d'Ötzi se composait de céréales, de cerf et de bouquetin.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -201812,12 +201933,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -201875,33 +201990,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "de céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "céréales, de cerf et de bouquetin.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "céréales, cerf et bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "céréales, de cerf et de bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "céréales, cerf et bouquetin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -201923,12 +202045,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -202004,33 +202120,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des outils et savait utiliser le feu",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des outils",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -202052,12 +202175,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -202132,33 +202249,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "objets néolithiques en obsidienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "objets néolithiques en obsidienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "obsidienne",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "objets néolithiques en obsidienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "objets néolithiques en obsidienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "objets néolithiques en obsidienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des objets néolithiques en obsidienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -202186,12 +202310,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -202255,33 +202373,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "néolithiques en obsidienne",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "néolithiques en obsidienne",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "objets néolithiques en obsidienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "néolithiques en obsidienne",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "objets néolithiques en obsidienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "objets néolithiques en obsidienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "objets néolithiques en obsidienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -202315,12 +202440,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -202384,33 +202503,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Europe centrale (civilisation danubienne)",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Europe centrale (civilisation danubienne)",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -202420,12 +202546,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -202506,33 +202626,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Europe centrale (civilisation danubienne)",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Europe centrale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -202548,12 +202675,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -202652,33 +202773,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "grottes ouvertes ou peu profondes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans le sol d' abris-sous-roche",
-                  "rougeL": 0.20000000000000004
+                  "rougeL": 0.20000000000000004,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans le sol d' abris-sous-roche",
-                  "rougeL": 0.20000000000000004
+                  "rougeL": 0.20000000000000004,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "grottes ouvertes ou peu profondes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans le sol d' abris-sous-roche",
-                  "rougeL": 0.20000000000000004
+                  "rougeL": 0.20000000000000004,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "abris-sous-roche",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans des abris-sous-roche",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -202694,12 +202822,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -202780,33 +202902,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "polissage des pierres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "polissage des pierres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à la période",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "polissage des pierres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Néolithique, qui signifie « nouvelle pierre » en grec ancien.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "au polissage des pierres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la période",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -202852,12 +202981,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -202908,33 +203031,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des variations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des variations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des variations. Avant 15000 il était tempéré. Puis il redevient froid et sec, la végétation herbeuse se développe, favorisant les herbivores, l' antilope saga",
-                  "rougeL": 0.08333333333333333
+                  "rougeL": 0.08333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des variations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "froid (il s' agit de la dernière période des glaciations).",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des variations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "des variations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -202974,12 +203104,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -203036,33 +203160,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une « culture » particulière",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le Châtelperronien (relié à l' homme de Néanderthal), l' Aurignacien, le Gavettien, le Solutréen et le Magdalénien",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "culture",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cultures archéologiques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une « culture » particulière",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les noms des cultures archéologiques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Châtelperronien, l'Aurignacien, le Gavettien, le Solutréen et le Magdalénien.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -203102,12 +203233,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -203169,33 +203294,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tranchant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un tranchant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un tranchant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un tranchant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un tranchant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un tranchant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un tranchant",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -203229,12 +203361,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -203297,33 +203423,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour la traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -203351,12 +203484,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -203420,33 +203547,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la traversée de la Seine",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la traversée de la Seine.",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -203456,12 +203590,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -203549,33 +203677,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de 1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -203615,12 +203750,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -203671,33 +203800,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "de 1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "de 1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "de 1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de 1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "de 1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de 1846 à 1868",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -203731,12 +203867,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -203799,42 +203929,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les anciens explorateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les anciens explorateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les anciens explorateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les anciens explorateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "anciens explorateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "anciens explorateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les anciens explorateurs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -203939,33 +204070,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "poids d' environ 50 kg",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -203999,12 +204137,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -204073,33 +204205,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "poids d' environ 50 kg",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Homo rudolfensis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Aucune séquence de mots dans l'article ne répond à la question posée.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -204127,12 +204266,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -204201,33 +204334,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -204261,12 +204401,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -204331,33 +204465,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "diriger des fouilles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "professeur de sa discipline au collège de France",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il est nommé directeur de différents musées.",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -204373,12 +204514,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -204453,33 +204588,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "directeur de différents musées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -204513,12 +204655,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -204581,33 +204717,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grattoirs carénés qui servent à gratter les peaux, burins carénés et burins busqués",
-                  "rougeL": 0.6923076923076924
+                  "rougeL": 0.6923076923076924,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -204641,12 +204784,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -204707,33 +204844,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grattoirs carénés qui servent à gratter les peaux, burins carénés et burins busqués",
-                  "rougeL": 0.6923076923076924
+                  "rougeL": 0.6923076923076924,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "burins carénés et burins busqués",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -204761,12 +204905,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -204839,33 +204977,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Breuil",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' abée Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'abbé Breuil",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -204899,12 +205044,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -204967,33 +205106,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "outils en silex à côté d' os de mammifères",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "outils en silex à côté d' os de mammifères",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "outils en silex à côté d' os de mammifères",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "outils en silex à côté d' os de mammifères",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "outils en silex",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "outils en silex",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les outils en silex",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -205027,12 +205173,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -205091,33 +205231,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "outils en silex",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "outils en silex",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "outils en silex à côté d' os de mammifères",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.95
                 },
                 "Camembert_baseline": {
                   "answer_pred": "outils en silex à côté d' os de mammifères",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.95
                 },
                 "llama-2_lora": {
                   "answer_pred": "outils en silex",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' homme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des outils en silex",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.95
                 }
               },
               "human_annot": {
@@ -205205,48 +205352,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -205557,33 +205662,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "coup de poing",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.1
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ces outils étaient probablement utilisés pour tuer des animaux, mais c' est très incertain",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.45
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "couper, de piquer et de frapper",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.15
                 },
                 "Camembert_baseline": {
                   "answer_pred": "On frappe",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.1
                 },
                 "llama-2_lora": {
                   "answer_pred": "par des animaux",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.05
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.2
                 },
                 "GPT-3.5": {
                   "answer_pred": "c'est très incertain",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.45
                 }
               },
               "human_annot": {
@@ -205713,48 +205825,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur inacceptable"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -206032,33 +206102,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "groupes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Les groupes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les groupes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les groupes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les groupes s' y installaient et y habitaient.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les groupes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les groupes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -206086,12 +206163,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -206160,33 +206231,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans la même région",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au Tchad",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Tchad",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Tchad",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Tchad",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans la même région, dans le but de découvrir d' autres fossiles",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans la même région (au Tchad)",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -206226,12 +206304,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -206285,33 +206357,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "habitation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "habitation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une habitation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "Camembert_baseline": {
                   "answer_pred": "habitation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "llama-2_lora": {
                   "answer_pred": "une habitation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une habitation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "GPT-3.5": {
                   "answer_pred": "une habitation",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 }
               },
               "human_annot": {
@@ -206399,48 +206478,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -206737,33 +206774,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les archéologues",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -206773,12 +206817,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -206877,33 +206915,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les hommes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -206937,12 +206982,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -207017,33 +207056,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Meruti",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -207077,12 +207123,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -207146,33 +207186,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "On trouve aussi des bisons, des aurochs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "On",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "On trouve aussi des bisons, des aurochs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "renne et le cheval. On trouve aussi des bisons, des aurochs. Le mammouth",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le mammouth disparait progressivement bien qu' il soit toujours représenté.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Les animaux les plus consommés",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "On trouve aussi des bisons, des aurochs.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -207206,12 +207253,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -207274,33 +207315,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "village de Saint-Acheul",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "du village de Saint-Acheul",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "village de Saint-Acheul",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "village de Saint-Acheul, près d' Amiens",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Saint-Acheul, près d' Amiens (dans la Somme)",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "du village de Saint-Acheul, près d' Amiens (dans la Somme)",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le nom Acheuléen provient du village de Saint-Acheul, près d'Amiens (dans la Somme).",
-                  "rougeL": 0.4761904761904762
+                  "rougeL": 0.4761904761904762,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -207328,12 +207376,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -207396,33 +207438,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Saint-Acheul",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Saint-Acheul",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Saint-Acheul",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Saint-Acheul,",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Saint-Acheul",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Saint-Acheul",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Saint-Acheul",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -207456,12 +207505,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -207524,42 +207567,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "travaillant les hommes produisent leur alimentation végétale et abandonnent progressivement la cueillette",
-                  "rougeL": 0.9473684210526316
+                  "rougeL": 0.9473684210526316,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la culture nait lorsque les hommes prennent le contrôle de la croissance de certaines espèces végétales",
-                  "rougeL": 0.1904761904761905
+                  "rougeL": 0.1904761904761905,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "alimentation végétale et abandonnent progressivement la cueillette",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "La culture",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la culture des plantes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "La culture des plantes commence lorsque en travaillant les hommes produisent leur alimentation végétale et abandonnent progressivement la cueillette.",
-                  "rougeL": 0.8695652173913044
+                  "rougeL": 0.8695652173913044,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la production alimentaire végétale",
-                  "rougeL": 0.15384615384615383
+                  "rougeL": 0.15384615384615383,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -207652,42 +207696,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Henri Breuil",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Henri Breuil",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Henri Breuil",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Henri Breuil",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' abbé Henri Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' abbé Henri Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Henri Breuil",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -207771,33 +207816,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Henri Breuil",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Henri Breuil",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Henri Breuil",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Henri Breuil",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Henri Breuil",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' abbé Henri Breuil",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'abbé Henri Breuil",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -207807,12 +207859,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -207896,33 +207942,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la sève ne peut plus alimenter la partie supérieure de l' arbre qui sèche sur place et est brûlé",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la sève",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un engrais naturel",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sève",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sève",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la sève",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la sève",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -207944,12 +207997,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -208025,33 +208072,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Édouard Piette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -208085,12 +208139,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -208155,33 +208203,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "kangourous",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "kangourous",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "kangourous",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "kangourous",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "kangourous",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "kangourous",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "c'est le cas des kangourous.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -208209,12 +208264,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -208284,33 +208333,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "abris-sous-roche",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 0.7
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sol d' abris-sous-roche",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 0.9
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sol d' abris-sous-roche",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 0.9
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sol d' abris-sous-roche,",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 0.9
                 },
                 "llama-2_lora": {
                   "answer_pred": "abris-sous-roche",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 0.7
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sol d' abris-sous-roche",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 0.9
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans le sol d' abris-sous-roche",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 0.9
                 }
               },
               "human_annot": {
@@ -208398,48 +208454,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -208730,33 +208744,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "abris-sous-roche",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans le sol d' abris-sous-roche",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans le sol d' abris-sous-roche",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "abris-sous-roche,",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "abris-sous-roche",
-                  "rougeL": 0.5333333333333333
+                  "rougeL": 0.5333333333333333,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le sol d' abris-sous-roche, c' est - à - dire des grottes ouvertes ou peu profondes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans le sol d'abris-sous-roche",
-                  "rougeL": 0.4705882352941177
+                  "rougeL": 0.4705882352941177,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -208796,12 +208817,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -208863,33 +208878,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Maud Leforestier et Frédéric Blérot",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Maud Leforestier et Frédéric Blérot",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Maud Leforestier et Frédéric Blérot",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Christian Lapie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Christian Lapie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Christian Lapie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les architectes Maud Leforestier et Frédéric Blérot.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -208899,12 +208921,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -208982,33 +208998,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Maud Leforestier et Frédéric Blérot",
-                  "rougeL": 0.9565217391304348
+                  "rougeL": 0.9565217391304348,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Maud Leforestier et Frédéric Blérot",
-                  "rougeL": 0.9565217391304348
+                  "rougeL": 0.9565217391304348,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Maud Leforestier et Frédéric Blérot",
-                  "rougeL": 0.9565217391304348
+                  "rougeL": 0.9565217391304348,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Maud Leforestier et Frédéric Blérot",
-                  "rougeL": 0.9565217391304348
+                  "rougeL": 0.9565217391304348,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Maud Leforestier et Frédéric Blérot",
-                  "rougeL": 0.9565217391304348
+                  "rougeL": 0.9565217391304348,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Maud Leforestier et Frédéric Blérot",
-                  "rougeL": 0.9565217391304348
+                  "rougeL": 0.9565217391304348,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Maud Leforestier et Frédéric Blérot",
-                  "rougeL": 0.9565217391304348
+                  "rougeL": 0.9565217391304348,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -209042,12 +209065,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -209111,33 +209128,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la première balle traversa le bord de la voiture et atteignit la duchesse de Hohenberg à l' abdomen",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "deux fois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la première balle traversa le bord de la voiture et atteignit la duchesse de Hohenberg à l' abdomen",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la première balle traversa le bord de la voiture et atteignit la duchesse de Hohenberg à l' abdomen. La seconde balle",
-                  "rougeL": 0.19999999999999998
+                  "rougeL": 0.19999999999999998,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "deux fois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux fois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Princip tire une deuxième balle.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -209147,12 +209171,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -209239,33 +209257,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Boroević",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Boroevi",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Boroevi",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Boroević",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Boroević",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Boroević",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Boroević",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -209299,12 +209324,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -209361,33 +209380,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Boroević",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Boroevi",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Boroevi",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Boroević",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Boroević",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Boroević",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Boroević",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -209421,12 +209447,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -209507,33 +209527,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Hitler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Hitler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Chef d' escadron de la flotte du Führer",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "faire croire aux soldats allemands et à la population civile qu' il arrivait immédiatement après la fin de la lutte.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Hitler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Hitler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Hitler",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -209555,12 +209582,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -209635,33 +209656,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "24 septembre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "24 septembre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "24 septembre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "24 septembre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "24 septembre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nuit du 24 septembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la nuit du 24 septembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -209671,12 +209699,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -209758,33 +209780,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "24 septembre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "24 septembre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "24 septembre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans la nuit du 24 septembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le 24 septembre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans la nuit du 24 septembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans la nuit du 24 septembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -209818,12 +209847,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -209899,33 +209922,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sa volonté d' accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu' elle ne perdait pas de territoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "accepter la protection italienne et même un prince italien comme souverain",
-                  "rougeL": 0.7000000000000001
+                  "rougeL": 0.7000000000000001,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sa volonté d' accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu' elle ne perdait pas de territoire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa volonté d' accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu' elle ne perdait pas de territoire.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu'elle ne perd pas de territoire",
-                  "rougeL": 0.8799999999999999
+                  "rougeL": 0.8799999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "exprima sa volonté d' accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu' elle ne perdait pas de territoire",
-                  "rougeL": 0.9285714285714286
+                  "rougeL": 0.9285714285714286,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sa volonté d'accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu'elle ne perdait pas de territoire.",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -209935,12 +209965,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -210033,33 +210057,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu' elle ne perdait pas de territoire",
-                  "rougeL": 0.9600000000000001
+                  "rougeL": 0.9600000000000001,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "accepter la protection italienne et même un prince italien comme souverain",
-                  "rougeL": 0.7000000000000001
+                  "rougeL": 0.7000000000000001,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu' elle ne perdait pas de territoire",
-                  "rougeL": 0.9600000000000001
+                  "rougeL": 0.9600000000000001,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa volonté d' accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu' elle ne perdait pas de territoire.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu' elle ne perdait pas de territoire",
-                  "rougeL": 0.9600000000000001
+                  "rougeL": 0.9600000000000001,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "exprima sa volonté d' accepter la protection italienne et même un prince italien comme souverain aussi longtemps qu' elle ne perdait pas de territoire",
-                  "rougeL": 0.9285714285714286
+                  "rougeL": 0.9285714285714286,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "accepter la protection italienne",
-                  "rougeL": 0.375
+                  "rougeL": 0.375,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -210105,12 +210136,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -210161,33 +210186,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le lieutenant Kompartz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Kompartz",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "lieutenant Kompartz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "lieutenant Kompartz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le lieutenant Kompartz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le lieutenant Kompartz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le lieutenant Kompartz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -210203,12 +210235,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -210293,33 +210319,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "eau potable",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "eau potable pendant les repas et en ne fumant pas",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "eau potable",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' eau potable",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "300 £ supplémentaires par an pour maintenir le niveau de vie d' un officier de cavalerie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "eau potable",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de l'eau potable",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -210353,12 +210386,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -210434,33 +210461,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des colonnes de soldats allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "colonnes de soldats allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "colonnes de soldats allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des colonnes de soldats allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Allemands",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des colonnes de soldats allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des soldats allemands",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -210500,12 +210534,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -210568,33 +210596,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "des colonnes de soldats allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des colonnes de soldats allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "colonnes de soldats allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des colonnes de soldats allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Allemands",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des colonnes de soldats allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les soldats allemands",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -210616,12 +210651,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -210696,33 +210725,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Première Guerre mondiale",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Première Guerre mondiale",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Première Guerre mondiale",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Première Guerre mondiale",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Première Guerre mondiale.",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Première Guerre mondiale",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Mitrofan Nejentsev participe à la Première Guerre mondiale.",
-                  "rougeL": 0.13333333333333333
+                  "rougeL": 0.13333333333333333,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -210762,12 +210798,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -210819,33 +210849,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Première Guerre mondiale et à la guerre civile russe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Première Guerre mondiale et à la guerre civile russe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Première Guerre mondiale et à la guerre civile russe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à la Première Guerre mondiale et à la guerre civile russe",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Première Guerre mondiale. Mitrofan Nejentsev. Mitrofan Osipovitch Nejentsev (russe : Митрофан Осипович Неженцев) est un colonel d' état-major russe né en 1886 et mort au combat le 12 avril 1918 près de Ekaterinodar. Il participa à la Première Guerre mondiale et à la guerre civile russe.",
-                  "rougeL": 0.09375
+                  "rougeL": 0.09375,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Première Guerre mondiale",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Mitrofan Osipovitch Nejentsev participe à la Première Guerre mondiale et à la guerre civile russe.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -210873,12 +210910,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -210978,33 +211009,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Paul Prosper Henrys",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Paul Prosper Henrys",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Paul Prosper Henrys",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Paul Prosper Henrys",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Lunéville.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "il",
-                  "rougeL": 0
+                  "rougeL": 0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Paul Prosper Henrys",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -211050,12 +211088,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -211107,33 +211139,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Karl Bodenschatz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Karl Bodenschatz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Karl Bodenschatz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Karl Bodenschatz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Karl Bodenschatz, officier d' ordonnance de Richthofen au Jagdgeschwaders 1",
-                  "rougeL": 0.4827586206896552
+                  "rougeL": 0.4827586206896552,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Karl Bodenschatz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Karl Bodenschatz, officier d'ordonnance de Richthofen au Jagdgeschwaders 1",
-                  "rougeL": 0.4666666666666667
+                  "rougeL": 0.4666666666666667,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -211173,12 +211212,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -211260,33 +211293,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Fabian Ware",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Fabian Ware",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Fabian Ware",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Fabian Ware",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Fabian Ware",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Fabian Ware",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Fabian Ware",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -211320,12 +211360,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -211388,33 +211422,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Éamon de Valera du Sinn Fein",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Éamon de Valera",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Éamon de Valera du Sinn Fein",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Éamon de Valera",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Éamon de Valera",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Éamon de Valera",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Éamon de Valera du Sinn Fein.",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -211430,12 +211471,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -211517,33 +211552,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "harceler les tranchées voisines, pour couper les barbelés et détruire les bunkers adverses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "harceler les tranchées voisines, pour couper les barbelés et détruire les bunkers adverses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à la verticale",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "harceler les tranchées voisines, pour couper les barbelés et détruire les bunkers adverses.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "harceler les tranchées voisines, pour couper les barbelés et détruire les bunkers adverses.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "harceler les tranchées voisines, pour couper les barbelés et détruire les bunkers adverses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "harceler les tranchées voisines, couper les barbelés, détruire les bunkers adverses",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -211571,12 +211613,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -211660,33 +211696,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "60e division d' infanterie",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "21e division d' infanterie",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "21e division d' infanterie",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "21e division d' infanterie",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "21e division d' infanterie",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "21e division d' infanterie",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la 21e division d'infanterie",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -211714,12 +211757,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -211788,33 +211825,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -211848,12 +211892,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -211910,33 +211948,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Heurteaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -212150,48 +212195,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -212374,33 +212377,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une arme psychologique, destinée à terroriser les Parisiens, les désordres et les manifestations ainsi suscités poussant le gouvernement français à demander un armistice",
-                  "rougeL": 0.08
+                  "rougeL": 0.08,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "solide haubanage partant d' un mât central",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "obus de 210 mm chemisés entre 210 et 240 mm pour s' adapter à l' usure du tube provoquée par l' effet d' arrachement des munitions",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "canons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "mont de Joie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les canons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les canons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -212410,12 +212420,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -212508,33 +212512,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Woditschka",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Woditschka",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Woditschka",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Woditschka",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Chvéïk et Woditschka",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Chvéïk et Woditschka",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Chvéïk et Woditschka",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -212544,12 +212555,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -212690,33 +212695,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "lieutenant de Villaret",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le lieutenant de Villaret",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "lieutenant de Villaret",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le lieutenant de Villaret",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Villaret",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le lieutenant de Villaret",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le lieutenant de Villaret",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -212726,12 +212738,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -212819,33 +212825,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "de nombreuses autres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "de nombreuses autres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trente-cinq personnes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "vénizélistes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de nombreuses autres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "nombreuses autres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de nombreuses autres",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -212879,12 +212892,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -212947,33 +212954,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en août 1895",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "août 1895",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "août 1895",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "août 1895",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en août 1895",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en août 1895",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Konrad est nommé à la 1re brigade d'artillerie de Munich en août 1895.",
-                  "rougeL": 0.35294117647058826
+                  "rougeL": 0.35294117647058826,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -213007,12 +213021,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -213070,33 +213078,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1895",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1895",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "août 1895",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "août 1895",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1895",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en août 1895",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en août 1895",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -213124,12 +213139,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -213199,42 +213208,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "port de Vladivostok en Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vladivostok",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "port de Vladivostok",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vladivostok",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vladivostok",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vladivostok en Sibérie",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "port de Vladivostok",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -213321,33 +213331,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "port de Vladivostok en Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "port de Vladivostok en Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "port de Vladivostok en Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vladivostok en Sibérie",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vladivostok en Sibérie",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vladivostok en Sibérie",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans le port de Vladivostok en Sibérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -213375,12 +213392,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -213449,33 +213460,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -213515,12 +213533,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -213571,33 +213583,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Normandie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -213625,12 +213644,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -213699,33 +213712,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Deverell",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Deverell",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Deverell",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Deverell",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "llama-2_lora": {
                   "answer_pred": "Deverell",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Deverell",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 },
                 "GPT-3.5": {
                   "answer_pred": "Deverell",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.9
                 }
               },
               "human_annot": {
@@ -213939,48 +213959,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -214152,33 +214130,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Schlieffen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Schlieffen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Schlieffen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Schlieffen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Schlieffen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Schlieffen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Schlieffen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -214212,12 +214197,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -214280,33 +214259,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "organe principal des activistes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en tant qu' organe principal des activistes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "organe principal des activistes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en tant qu' organe principal des activistes,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "comme organe principal des activistes",
-                  "rougeL": 0.75
+                  "rougeL": 0.75,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "agissant en tant qu' organe principal des activistes",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Conseil de Flandre agit en tant qu'organe principal des activistes",
-                  "rougeL": 0.46153846153846156
+                  "rougeL": 0.46153846153846156,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -214346,12 +214332,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -214402,33 +214382,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "organe principal des activistes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "organe principal des activistes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "organe principal des activistes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en tant qu' organe principal des activistes,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "organe principal des activistes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "organe principal des activistes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "organe principal des activistes",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -214462,12 +214449,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -214542,42 +214523,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sophie",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Sophie",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Sophie",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "François-Joseph",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "l'empereur François-Joseph",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' empereur François-Joseph",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L' empereur François-Joseph lui demande d' y réfléchir.",
-                  "rougeL": 0.8750000000000001
+                  "rougeL": 0.8750000000000001,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -214676,42 +214658,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "François-Joseph",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "François-Joseph",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "François-Joseph",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "François-Joseph",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "François-Joseph",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "empereur François-Joseph",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'empereur François-Joseph",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -214804,33 +214787,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tout ou partie d' un document",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tout ou partie d' un document",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un document",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tout ou partie d' un document",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "tout ou partie d' un document",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Cet article contient tout ou partie d' un document provenant du site La Vie rémoise.",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Aucune séquence de mots ne répond à la question posée car l'article ne mentionne pas ce qui provient du site La Vie rémoise.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -214876,12 +214866,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -214926,33 +214910,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tout ou partie d' un document",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5555555555555556
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tout ou partie d' un document provenant du site La Vie rémoise",
-                  "rougeL": 0.19999999999999998
+                  "rougeL": 0.19999999999999998,
+                  "HScore": 0.7222222222222222
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un document provenant du site La Vie rémoise",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.6111111111111112
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tout ou partie d' un document",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5555555555555556
                 },
                 "llama-2_lora": {
                   "answer_pred": "un document provenant du site La Vie rémoise",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.6111111111111112
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "document provenant du site La Vie rémoise",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.6111111111111112
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'y a pas dans l'article mentionné de séquence de mots qui répond à la question donnée.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.2222222222222222
                 }
               },
               "human_annot": {
@@ -215146,44 +215137,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -215297,12 +215250,6 @@
                     "annot": "annot_10",
                     "rating": "Erreur inacceptable"
                   }
-                ],
-                "question": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Question douteuse"
-                  }
                 ]
               },
               "lu_in_question": false,
@@ -215348,33 +215295,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "voisins ou amis musulmans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "voisins ou amis musulmans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des voisins ou amis musulmans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des voisins ou amis musulmans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des voisins ou amis musulmans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des voisins ou amis musulmans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des voisins ou amis musulmans",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -215414,12 +215368,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -215476,33 +215424,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ils n' ont plus rien à manger mais réussissent toutefois à survivre en pêchant du poisson, en attrapant des oiseaux et en buvant de l' eau de pluie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Rickenbacker et son équipage",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Rickenbacker et son équipage",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Rickenbacker et son équipage",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Rickenbacker et son équipage",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Rickenbacker et son équipage",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Rickenbacker et son équipage.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -215530,12 +215485,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -215608,33 +215557,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Chaque camp",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ouest",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "course à la mer",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Chaque camp",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le camp allemand",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Chaque camp",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le commandement allemand",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -215650,12 +215606,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -215730,33 +215680,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Chaque camp",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Chaque camp",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Chaque camp",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Chaque camp",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le commandement allemand",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Chaque camp",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Chaque camp",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -215796,12 +215753,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -215858,33 +215809,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "près de Vlorë en Albanie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "près de Vlor en Albanie",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "près de Vlor en Albanie",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "près de Vlorë en Albanie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "près de Vlore en Albanie",
-                  "rougeL": 0.7692307692307692
+                  "rougeL": 0.7692307692307692,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "près de Vlorë en Albanie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Vlorë en Albanie",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -215900,12 +215858,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -215982,33 +215934,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vlorë",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vlor",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Vlor",
-                  "rougeL": 0.4444444444444445
+                  "rougeL": 0.4444444444444445,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vlorë",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vlorë",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vlorë",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "près de Vlorë en Albanie.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -216048,12 +216007,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -216112,33 +216065,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Au soir du 21 avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "soir du 21 avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Au soir du 21 avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "soir du 21 avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "au soir du 21 avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Au soir du 21 avril",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Au soir du 21 avril.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -216178,12 +216138,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -216243,42 +216197,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le 2nd bataillon du major Jackson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le 2nd bataillon du major Jackson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le 2nd bataillon du major Jackson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "2nd bataillon du major Jackson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "2nd bataillon du major Jackson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le 2nd bataillon du major Jackson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 2nd bataillon du major Jackson.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -216365,33 +216320,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "2nd bataillon du major Jackson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "2nd bataillon",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "2nd bataillon",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "2nd bataillon",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "2nd bataillon du major Jackson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "2nd bataillon du major Jackson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 2nd bataillon",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -216407,12 +216369,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -216493,33 +216449,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le curé, l' abbé Gaillard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "abbé Gaillard",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Gaillard",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le curé, l' abbé Gaillard",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' abbé Gaillard",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' abbé Gaillard",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'abbé Gaillard",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -216547,12 +216510,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -216615,33 +216572,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "abbé Gaillard",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "abbé Gaillard",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "abbé Gaillard",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' abbé Gaillard",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' abbé Gaillard",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' abbé Gaillard",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'abbé Gaillard",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -216675,12 +216639,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -216743,33 +216701,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "flotte",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "flotte",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "flotte britannique",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "flotte britannique",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "flotte aux origines modestes",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la flotte",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la flotte aux origines modestes",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -216785,12 +216750,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -216872,33 +216831,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -216938,12 +216904,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -216994,33 +216954,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Zurich",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -217048,12 +217015,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -217122,33 +217083,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Allemagne.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Allemagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Allemagne",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -217164,12 +217132,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -217251,33 +217213,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1922",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1922",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1922",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1922",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1922",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1922",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "datant de 1922",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -217305,12 +217274,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -217379,33 +217342,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "techniques de guérilla des Boers",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "techniques de guérilla des Boers",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "techniques de guérilla des Boers",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "techniques de guérilla des Boers",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pertes devant les techniques de guérilla des Boers",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "devant les techniques de guérilla des Boers",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les techniques de guérilla des Boers",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -217433,12 +217403,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -217501,33 +217465,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Boers",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Boers",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Boers",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Boers",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Boers",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des Boers",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des Boers",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -217543,12 +217514,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -217629,33 +217594,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "turques et austro - hongroises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "turques et austro - hongroises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "turques et austro - hongroises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "turques et austro - hongroises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "turques et austro - hongroises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "turques et austro - hongroises",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "turques et austro-hongroises",
-                  "rougeL": 0.7999999999999999
+                  "rougeL": 0.7999999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -217683,12 +217655,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -217757,33 +217723,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -217817,12 +217790,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -217885,33 +217852,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Sancerre",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -217951,12 +217925,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -218007,33 +217975,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Sancerre sur la Loire",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -218067,12 +218042,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -218135,33 +218104,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une certaine sympathie pour le parti nazi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une certaine sympathie pour le parti nazi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sympathie pour le parti nazi",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une certaine sympathie pour le parti nazi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sympathie pour le parti nazi",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une certaine sympathie pour le parti nazi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le comte Czernin exprima une certaine sympathie pour le parti nazi.",
-                  "rougeL": 0.625
+                  "rougeL": 0.625,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -218195,12 +218171,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -218257,33 +218227,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "parti nazi, bien que son fils, Manfred, qui était resté avec sa mère en Angleterre fut un pilote de la RAF pendant la Seconde Guerre mondiale fut un pilote de la RAF pendant la Seconde Guerre mondial",
-                  "rougeL": 0.20689655172413793
+                  "rougeL": 0.20689655172413793,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "nazi",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nazi",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le parti nazi",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le parti nazi",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le parti nazi",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le parti nazi",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -218299,12 +218276,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -218385,33 +218356,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "tous ses buts de guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "tous ses buts de guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "tous ses buts de guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "tous ses buts de guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ses buts de guerre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "tous ses buts de guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La Bulgarie a maintenant réalisé tous ses buts de guerre.",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -218445,12 +218423,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -218525,33 +218497,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "États-Unis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "États-Unis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "États-Unis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "États-Unis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "États-Unis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "États-Unis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les États-Unis.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -218591,12 +218570,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -218655,33 +218628,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "entrer à l' académie royale militaire et trois fois à Sandhurst",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "entrer à l' académie royale militaire et trois fois à Sandhurst",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "entrer à l' académie royale militaire et trois fois à Sandhurst",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "il tente par deux fois d' entrer à l' académie royale militaire et trois fois à Sandhurst",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "entrer à l' académie royale militaire",
-                  "rougeL": 0.5882352941176471
+                  "rougeL": 0.5882352941176471,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "entrer à l' académie royale militaire et trois fois à Sandhurst",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'arrive pas à entrer à l'académie royale militaire.",
-                  "rougeL": 0.4210526315789474
+                  "rougeL": 0.4210526315789474,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -218727,12 +218707,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -218777,33 +218751,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trois fois à Sandhurst",
-                  "rougeL": 0.7368421052631579
+                  "rougeL": 0.7368421052631579,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "deux fois d' entrer à l' académie royale militaire et trois fois à Sandhurst",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "deux fois d' entrer à l' académie royale militaire et trois fois à Sandhurst",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "deux fois d' entrer à l' académie royale militaire et trois fois à Sandhurst",
-                  "rougeL": 0.923076923076923
+                  "rougeL": 0.923076923076923,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "trois fois à Sandhurst",
-                  "rougeL": 0.7368421052631579
+                  "rougeL": 0.7368421052631579,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "entre à l' académie royale militaire et trois fois à Sandhurst sans succès",
-                  "rougeL": 0.8461538461538461
+                  "rougeL": 0.8461538461538461,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "tente par deux fois d' entrer à l' académie royale militaire et trois fois à Sandhurst sans succès",
-                  "rougeL": 0.8275862068965517
+                  "rougeL": 0.8275862068965517,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -218849,12 +218830,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -218917,33 +218892,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "général Dubail",
-                  "rougeL": 0.22222222222222224
+                  "rougeL": 0.22222222222222224,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le IIe Armée française",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -218977,12 +218959,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -219052,33 +219028,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "de Castelnau",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le général de Castelnau",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -219094,12 +219077,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -219181,33 +219158,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "400 kg",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -219241,12 +219225,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -219309,33 +219287,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une explosion anormale au centre de la galerie",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une bombe ait explosé au bas d' un escalier d' accès sinon dans la galerie elle-même",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "explosion anormale au centre de la galerie",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "explosion anormale",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "explosion anormale au centre de la galerie",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une explosion anormale",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la destruction immédiate de la structure de soutènement et l'effondrement de la couverture de terres.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -219357,12 +219342,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -219432,33 +219411,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "explosion anormale au centre de la galerie",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "entraînant comme au Mont Cornillet des tués et des blessés en nombre, et de plus à Carspach la destruction immédiate de la structure de soutènement et l' effondre",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "explosion anormale au centre de la galerie",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "explosion anormale",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "entraînant comme au Mont Cornillet des tués et des blessés en nombre, et de plus à Carspach la destruction immédiate de la structure de soutènement et l' effondrement de la couverture de terres.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "entraînant comme au Mont Cornillet des tués et des blessés en nombre, et de plus à Carspach la destruction immédiate de la structure de soutènement et l' effondrement de la couverture de terres",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la destruction immédiate de la structure de soutènement et l' effondrement de la couverture de terres.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -219480,12 +219466,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -219573,33 +219553,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Union de la gauche socialiste, qui rassemble des chrétiens de gauche et d' anciens communistes, mais quitte ce mouvement lorsque celui-ci participe à la fondation du Parti socialiste unifié",
-                  "rougeL": 0.2727272727272727
+                  "rougeL": 0.2727272727272727,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Union de la gauche socialiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Union de la gauche socialiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Union de la gauche socialiste,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Union de la gauche socialiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Union de la gauche socialiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Union de la gauche socialiste",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -219633,12 +219620,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -219701,33 +219682,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "célébrer l' occupation australienne de la Nouvelle - Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour célébrer l' occupation australienne de la Nouvelle - Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour célébrer l' occupation australienne de la Nouvelle - Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pour célébrer l' occupation australienne de la Nouvelle - Bretagne.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "celebrer l'occupation australienne de la Nouvelle - Bretagne",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "célébrer l' occupation australienne de la Nouvelle - Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour célébrer l'occupation australienne de la Nouvelle-Bretagne",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -219767,12 +219755,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -219823,33 +219805,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "célébrer l' occupation australienne de la Nouvelle - Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "célébrer l' occupation australienne de la Nouvelle - Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "célébrer l' occupation australienne de la Nouvelle - Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "célébrer l' occupation australienne de la Nouvelle - Bretagne.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "célébrer l'occupation australienne de la Nouvelle - Bretagne",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "célébrer l' occupation australienne de la Nouvelle - Bretagne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour célébrer l'occupation australienne de la Nouvelle-Bretagne.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -219883,12 +219872,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -219975,33 +219958,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "château d' Itter",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "château d' Itter",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "La Rocque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "colonel de La Rocque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Château d' Itter",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "La Rocque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "La Rocque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -220047,12 +220037,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -220105,33 +220089,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "von Kluck",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "von Kluck",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "von Kluck",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "von Kluck",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "von Kluck",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "von Kluck",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "von Kluck",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -220165,12 +220156,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -220252,33 +220237,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dirigeants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "leurs dirigeants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "leurs dirigeants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les Alliés",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les dirigeants alliés",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dirigeants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les dirigeants, décidés",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -220312,12 +220304,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -220394,33 +220380,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dirigeants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "leurs dirigeants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "leurs dirigeants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les Alliés",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les dirigeants alliés",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dirigeants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "leurs dirigeants",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -220436,12 +220429,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -220524,33 +220511,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "256",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "256",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "256",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "256",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "256 personnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "256 personnes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mort de 256 personnes.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -220560,12 +220554,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -220652,33 +220640,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "675000",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "675000",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "675000",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "675000",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "675000 soldats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "environ 675000",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "environ 675000 soldats",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -220724,12 +220719,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -220774,33 +220763,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les Allemands comptent environ 675000 soldats",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Les Allemands comptent environ 675000 soldats",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les Allemands comptent environ 675000 soldats tués, blessés ou disparus au combat",
-                  "rougeL": 0.42857142857142855
+                  "rougeL": 0.42857142857142855,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "675000",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "675000 soldats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Français, Belges, Britanniques, Allemands",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Français, les Belges et les Britanniques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -220828,12 +220824,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -220932,33 +220922,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Walther von Lüttwitz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Walther von Lüttwitz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Walther von Lüttwitz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Walther von Lüttwitz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "IIIe corps d' armée",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Walther von Lüttwitz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Walther von Lüttwitz",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -220992,12 +220989,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -221060,33 +221051,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à une autre guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une autre guerre, fut -elle, pour les navires, ponctuelle",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à une autre guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à une autre guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "loin de la France, à une autre guerre",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à une autre guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à une autre guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -221120,12 +221118,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -221182,33 +221174,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une autre guerre",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "guerre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "victoire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une autre guerre",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une autre guerre",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la victoire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la victoire",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -221224,12 +221223,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -221310,33 +221303,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "après la bataille de Mons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "après la bataille de Mons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "après la bataille de Mons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "après la bataille de Mons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "après la bataille de Mons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "après la bataille de Mons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "après la bataille de Mons",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -221370,12 +221370,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -221432,33 +221426,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bataille de Mons",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bataille de Mons",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bataille de Mons",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bataille de Mons",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "bataille de Mons",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bataille de Mons",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Après la bataille de Mons.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -221492,12 +221493,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -221578,33 +221573,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Wilson est la personne qui apprend qu'avec un mandat parlementaire, il pourra plus facilement devenir administrateur des sociétés.",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -221632,12 +221634,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -221706,33 +221702,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "une fosse commune contenant les corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une fosse commune contenant les corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une fosse commune contenant les corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une fosse commune contenant les corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Fosse commune contenant les corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une fosse commune contenant les corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une fosse commune contenant les corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -221772,12 +221775,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -221834,33 +221831,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Rochefort, Brest, Cherbourg et Toulon",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -221894,12 +221898,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -221956,33 +221954,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Paris",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le départ de la brigade de 6000 hommes organisée en deux régiments de fusiliers marins pour aller en renfort de l'armée belge s'est fait à partir de Paris.",
-                  "rougeL": 0.0909090909090909
+                  "rougeL": 0.0909090909090909,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -222010,12 +222015,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -222084,33 +222083,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "À la recherche des traces du passé",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -222150,12 +222156,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -222206,33 +222206,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "passé du passé",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "traces du passé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -222266,12 +222273,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -222334,33 +222335,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "existence d' une lettre du premier ministre français Clemenceau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "l' intransigeance allemande anéantit tout espoir en s' exprimant dans un discours public, l' existence d' une lettre du premier ministre français Clemenceau",
-                  "rougeL": 0.3846153846153846
+                  "rougeL": 0.3846153846153846,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' intransigeance allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' existence d' une lettre du premier ministre français Clemenceau.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "intransigeance allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' existence d' une lettre du premier ministre français Clemenceau",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'intransigeance allemande",
-                  "rougeL": 0.8000000000000002
+                  "rougeL": 0.8000000000000002,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -222370,12 +222378,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -222464,33 +222466,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "premières sorties d' usine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "premières sorties d' usine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les premières sorties d' usine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les premières sorties d' usine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les premières sorties d' usine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les premières sorties d' usine",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les premières sorties d'usine.",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -222530,12 +222539,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -222592,33 +222595,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les corps de trois Kaiserschützen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les corps de trois Kaiserschützen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "corps de trois Kaiserschützen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les corps de trois Kaiserschützen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "trois Kaiserschützen",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les corps de trois Kaiserschützen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les corps de trois Kaiserschützen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -222646,12 +222656,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -222714,33 +222718,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "trois Kaiserschützen",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "trois Kaiserschützen",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "trois Kaiserschützen",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "trois Kaiserschützen",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "trois Kaiserschützen",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "trois Kaiserschützen",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les corps de trois Kaiserschützen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -222780,12 +222791,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -222842,33 +222847,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ami de von Rauffenstein",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un ami",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -222908,12 +222920,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -222970,33 +222976,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "défendre les droits des soldats d' Algérie.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour défendre les droits des soldats d'Algérie",
-                  "rougeL": 0.7692307692307692
+                  "rougeL": 0.7692307692307692,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -223036,12 +223049,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -223092,33 +223099,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "défendre les droits des soldats d' Algérie.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pour défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour défendre les droits des soldats d' Algérie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -223152,12 +223166,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -223220,33 +223228,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chef socialiste",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Jaurès",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Jaurès",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Jaurès",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Jaurès",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Jaurès",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jaurès",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -223280,12 +223295,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -223349,33 +223358,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les barbelés et les mitrailleuses comme les éléments à détruire pour relancer la guerre de mouvement",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les barbelés et les mitrailleuses comme les éléments à détruire pour relancer la guerre de mouvement",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les barbelés et les mitrailleuses comme les éléments à détruire pour relancer la guerre de mouvement",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -223409,12 +223425,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -223471,42 +223481,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les barbelés et les mitrailleuses",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -223599,33 +223610,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -223659,12 +223677,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -223721,33 +223733,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -223781,12 +223800,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -223843,33 +223856,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "chef de la mission militaire française en Pologne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -223903,12 +223923,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -223971,33 +223985,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la première balle traversa le bord de la voiture et atteignit la duchesse de Hohenberg à l' abdomen",
-                  "rougeL": 0.2222222222222222
+                  "rougeL": 0.2222222222222222,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la première balle traversa le bord de la voiture et atteignit la duchesse de Hohenberg à l' abdomen. La seconde balle atteignit l' archiduc dans le cou",
-                  "rougeL": 0.14814814814814814
+                  "rougeL": 0.14814814814814814,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la duchesse de Hohenberg à l' abdomen. La seconde balle atteignit l' archiduc dans le cou",
-                  "rougeL": 0.11764705882352941
+                  "rougeL": 0.11764705882352941,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "voiture",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sandwich",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "deux fois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Princip a tiré avec une balle.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -224013,12 +224034,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -224099,33 +224114,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Gough",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Gough",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Gough",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Gough",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Gough",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Gough",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Gough",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -224165,12 +224187,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -224228,33 +224244,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Allemagne de l'ouest",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -224270,12 +224293,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -224352,33 +224369,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Allemagne",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Allemagne de l' ouest",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En Allemagne",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -224418,12 +224442,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -224482,33 +224500,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Français",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "partie de Français qui ont rejoint les révolutionnaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "partie de Français qui ont rejoint les révolutionnaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Français qui ont rejoint les révolutionnaires",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de Français qui ont rejoint les révolutionnaires",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Français",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "de Français",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -224524,12 +224549,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -224604,33 +224623,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Français",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "partie de Français qui ont rejoint les révolutionnaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "partie de Français qui ont rejoint les révolutionnaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Français qui ont rejoint les révolutionnaires",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Français qui ont rejoint les révolutionnaires",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la propagande bolchevique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -224670,12 +224696,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -224732,33 +224752,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le 26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le 26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 26 août 1914.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -224768,12 +224795,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -224854,33 +224875,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le 26 août 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 26 août 1914.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -224920,12 +224948,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -224982,33 +225004,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Au début de 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "début de 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "début de 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "début de 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "10 sont des bataillons de la Garde",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Au début de 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Au début de 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -225024,12 +225053,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -225105,33 +225128,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1920",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1920",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1920",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1920",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "1920",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1920",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Au début de 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -225159,12 +225189,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -225234,33 +225258,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "les mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -225300,12 +225331,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -225356,33 +225381,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les mineurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les mineurs allemands.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -225422,12 +225454,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -225484,33 +225510,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un emploi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "emploi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un emploi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un emploi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un emploi dans une usine de mécanique automobile",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "emploi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un emploi",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -225520,12 +225553,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -225612,42 +225639,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fosse commune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fosse commune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à l' entrée sud du tunnel",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' entrée sud du tunnel,",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sud du tunnel",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une fosse commune à l' entrée sud du tunnel",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une fosse commune",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -225740,33 +225768,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "révolution russe de 1905",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "révolution russe de 1905",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "révolution russe de 1905",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "révolution russe de 1905",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "révolution russe de 1905",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "révolution russe de 1905",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "révolution russe de 1905",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -225806,12 +225841,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -225864,15 +225893,15 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1905",
                   "rougeL": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "révolution russe de 1905",
                   "rougeL": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "révolution russe de 1905",
                   "rougeL": 1.0
                 },
@@ -225938,33 +225967,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le service des armes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le service des armes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le service des armes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le service des armes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le service des armes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le service des armes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le service des armes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -225998,12 +226034,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -226066,33 +226096,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "4 novembre",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1er novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1er novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1er novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le 4 novembre",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1er novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 1er novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -226138,12 +226175,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -226190,33 +226221,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "4 novembre",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1er novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1er novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1er novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le 4 novembre",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Le 1er novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 1er novembre.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -226250,12 +226288,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -226320,33 +226352,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les survivants de la Première Guerre mondiale ayant subi une ou plusieurs blessures au combat",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "survivants de la Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "survivants de la Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "survivants de la Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "survivants de la Première Guerre mondiale ayant subi une ou plusieurs blessures au combat et affectés par des séquelles physiques graves, notamment au niveau du visage.",
-                  "rougeL": 0.4210526315789474
+                  "rougeL": 0.4210526315789474,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "survivants de la Première Guerre mondiale ayant subi une ou plusieurs blessures au combat et affectés par des séquelles physiques graves",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les survivants de la Première Guerre mondiale ayant subi une ou plusieurs blessures au combat",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -226356,12 +226395,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -226444,42 +226477,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les survivants de la Première Guerre mondiale ayant subi une ou plusieurs blessures au combat",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "gueules cassées",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "survivants de la Première Guerre mondiale ayant subi une ou plusieurs blessures au combat",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "survivants de la Première Guerre mondiale",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "au niveau du visage",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "survivants de la Première Guerre mondiale ayant subi une ou plusieurs blessures au combat et affectés par des séquelles physiques graves, notamment au niveau du visage",
-                  "rougeL": 0.4210526315789474
+                  "rougeL": 0.4210526315789474,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les survivants de la Première Guerre mondiale.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -226574,33 +226608,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un modèle « French gendarme » sans qu' aucun élément ne vienne valider la réalité de cette variante",
-                  "rougeL": 0.47058823529411764
+                  "rougeL": 0.47058823529411764,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un modèle « French gendarme »",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "modèle « French gendarme »",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un modèle « French gendarme »",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un modèle « French gendarme »",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un modèle « French gendarme »",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -226622,12 +226663,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -226696,33 +226731,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le modèle \"French gendarme\"",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -226750,12 +226792,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -226824,33 +226860,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -226878,12 +226921,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -226946,33 +226983,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pilotage sur Morane - Saulnier type L",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jean Navarre a appris le pilotage.",
-                  "rougeL": 0.14285714285714285
+                  "rougeL": 0.14285714285714285,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -227012,12 +227056,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -227074,33 +227112,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "coup de canon",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le premier coup de canon",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "résidence du poète",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "premier coup de canon",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un premier coup de canon",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le premier coup de canon",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le premier coup de canon",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -227116,12 +227161,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -227202,42 +227241,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dépôt de grenades ou de munitions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au centre de la galerie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "galerie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dépôt de grenades ou de munitions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "au centre de la galerie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "au bas d' un escalier d' accès sinon dans la galerie elle-même",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "d'un dépôt de grenades ou de munitions",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -227324,33 +227364,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dépôt de grenades ou de munitions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "grenades ou de munitions",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "grenades ou de munitions",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dépôt de grenades ou de munitions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de munitions",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "d' un dépôt de grenades ou de munitions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un dépôt de grenades ou de munitions",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -227366,12 +227413,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -227452,33 +227493,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "soldats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "soldats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "soldats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "officiers",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les soldats flamands sortis du rang",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les soldats flamands",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les soldats flamands",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -227512,12 +227560,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -227580,33 +227622,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les zones d' attaque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "zones d' attaque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les zones d' attaque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les zones d' attaque.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les zones d'attaque",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les zones d' attaque",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les zones d'attaque",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -227634,12 +227683,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -227702,33 +227745,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "élargir les zones d' attaque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "élargir les zones d' attaque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ils longent le canal à l' ouest du saillant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' élargir les zones d' attaque.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "élargir les zones d' attaque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "élargir les zones d' attaque",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "élargir les zones d'attaque",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -227768,12 +227818,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -227830,33 +227874,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Bundestag",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Bundestag",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Tribunal permanent des peuples, d' une sous-commission de l' ONU pour la prévention des droits de l' homme et la protection des minorités, du parlement européen, du Conseil de l' Europe et du Mercosur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Bundestag",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Bundestag",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Bundestag",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -227896,12 +227947,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -227949,33 +227994,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 14 février 1916.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -228003,12 +228055,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -228072,33 +228118,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 14 février 1916",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -228126,12 +228179,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -228201,33 +228248,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la mise en route de petits travaux d' intérêt local",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la mise en route de petits travaux d' intérêt local",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mise en route de petits travaux d' intérêt local",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la mise en route de petits travaux d' intérêt local",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "petits travaux d' intérêt local",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la mise en route de petits travaux d' intérêt local",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la mise en route de petits travaux d'intérêt local",
-                  "rougeL": 0.7692307692307692
+                  "rougeL": 0.7692307692307692,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -228261,12 +228315,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -228329,33 +228377,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quelques miraculés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "quelques miraculés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "quelques miraculés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "miraculés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des enfants",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quelques miraculés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "quelques miraculés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -228395,12 +228450,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -228451,33 +228500,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "quelques miraculés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "quelques miraculés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "miraculés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "miraculés",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des femmes et des jeunes filles enlevées, disparues dans le secret des maisons turques ou rééduquées dans les écoles islamiques",
-                  "rougeL": 0.08333333333333333
+                  "rougeL": 0.08333333333333333,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "quelques miraculés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "quelques miraculés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -228499,12 +228555,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -228579,33 +228629,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cadres de l' armée et de l' administration",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cadres de l' armée et de l' administration",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cadres de l' armée et de l' administration",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cadres de l' armée et de l' administration,",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "une partie des cadres de l' armée et de l' administration",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une partie des cadres de l' armée et de l' administration",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une partie des cadres de l'armée et de l'administration",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -228621,12 +228678,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -228703,33 +228754,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cadres de l' armée et de l' administration",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "cadres de l' armée et de l' administration",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "cadres de l' armée et de l' administration",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une partie des cadres de l' armée et de l' administration,",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une partie des cadres de l' armée et de l' administration",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cadres de l' armée et de l' administration",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "une partie des cadres de l' armée et de l' administration",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -228751,12 +228809,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -228833,33 +228885,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -228893,12 +228952,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -228957,33 +229010,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Allemands de Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -229023,12 +229083,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -229087,33 +229141,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Grèce, l' Italie et les puissances de l' Entente",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la Grèce, l' Italie et les puissances de l' Entente",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Balkans, la Grèce, l' Italie et les puissances de l' Entente",
-                  "rougeL": 0.8235294117647058
+                  "rougeL": 0.8235294117647058,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la Grèce, l' Italie et les puissances de l' Entente",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Grèce, Italie et les puissances de l'Entente",
-                  "rougeL": 0.7999999999999999
+                  "rougeL": 0.7999999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Grèce, l' Italie et les puissances de l' Entente",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Grèce, l'Italie et les puissances de l'Entente",
-                  "rougeL": 0.39999999999999997
+                  "rougeL": 0.39999999999999997,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -229129,12 +229190,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -229209,33 +229264,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Grèce, l' Italie et les puissances de l' Entente",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Grèce, l' Italie et les puissances de l' Entente",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Constantin Ier",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la Grèce, l' Italie et les puissances de l' Entente",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Grèce, Italie et puissances de l'Entente",
-                  "rougeL": 0.7999999999999999
+                  "rougeL": 0.7999999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la Grèce, l'Italie et les puissances de l'Entente",
-                  "rougeL": 0.39999999999999997
+                  "rougeL": 0.39999999999999997,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l' Italie et les puissances de l' Entente",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -229263,12 +229325,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -229337,33 +229393,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ligne de chemin de fer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "une ligne de chemin de fer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une ligne de chemin de fer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chemin de fer",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "une ligne de chemin de fer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une ligne de chemin de fer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une ligne de chemin de fer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -229397,12 +229460,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -229461,42 +229518,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "ligne de chemin de fer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "chemin de fer",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ligne de chemin de fer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "chemin de fer",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Mourmansk",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une ligne de chemin de fer vers ce qui allait devenir la ville de Mourmansk",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une ligne de chemin de fer",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -229591,33 +229649,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le plus gros obus qu' un fort pouvait tirer",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le plus gros obus qu' un fort pouvait tirer",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le plus gros obus qu' un fort pouvait tirer",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le plus gros obus",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.8
                 },
                 "llama-2_lora": {
                   "answer_pred": "un obus de Grosse Bertha",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 0.45
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le plus gros obus qu' un fort pouvait tirer",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un obus d'un fort",
-                  "rougeL": 0.28571428571428575
+                  "rougeL": 0.28571428571428575,
+                  "HScore": 0.6
                 }
               },
               "human_annot": {
@@ -229873,48 +229938,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -230080,33 +230103,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Mudra",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Mudra",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Mudra",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Mudra",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Bruno von Mudra",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Bruno von Mudra",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Bruno von Mudra",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -230116,12 +230146,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -230239,33 +230263,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Mudra",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Mudra",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Mudra",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Mudra",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Bruno von Mudra",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Bruno von Mudra",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Bruno von Mudra",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -230275,12 +230306,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -230380,33 +230405,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les observateurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les observateurs allemands dans la vallée",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les observateurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "observateurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "observateurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "observateurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les observateurs allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -230416,12 +230448,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -230508,33 +230534,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "au lendemain de la Révolution d' Octobre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au lendemain de la Révolution d' Octobre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au lendemain de la Révolution d' Octobre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "lendemain de la Révolution d' Octobre.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "au lendemain de la Révolution d' Octobre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "au lendemain de la Révolution d' Octobre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Botchkareva est arrêtée au lendemain de la Révolution d' Octobre.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -230568,12 +230601,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -230630,33 +230657,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Révolution d' Octobre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Révolution d' Octobre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Révolution d' Octobre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Révolution d' Octobre.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "révolution d'Octobre",
-                  "rougeL": 0.3333333333333333
+                  "rougeL": 0.3333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Révolution d' Octobre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Révolution d' Octobre",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -230684,12 +230718,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -230758,33 +230786,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "FEA 11",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "FEA 11",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "FEA 11",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "FEA 11",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "FEA 11",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Josef Carl Peter Jacobs",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "il est envoyé au FEA 11 à Laon",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -230830,12 +230865,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -230886,33 +230915,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "France. Jean Baptiste Eugène Estienne. Jean Baptiste Eugène Estienne (7 novembre 1860 à Condé -en - Barrois, France - 2 avril 1936 à Paris) est un artilleur et ingénieur militaire français.",
-                  "rougeL": 0.05882352941176471
+                  "rougeL": 0.05882352941176471,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "France",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -230946,12 +230982,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -231015,33 +231045,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "4 h 10",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "4 h 10",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "4 h 10",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pendant les nuits précédentes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "durant les nuits précédentes",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pendant les nuits précédentes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les nuits précédentes.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -231057,12 +231094,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -231137,33 +231168,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pendant les nuits précédentes",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les nuits précédentes",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les nuits précédentes",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les nuits précédentes",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "durant les nuits précédentes",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les nuits précédentes",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les nuits précédentes",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -231191,12 +231229,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -231265,33 +231297,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en bas du versant méridional de la butte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en bas du versant méridional de la butte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bas du versant méridional de la butte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en bas du versant méridional de la butte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en bas du versant méridional de la butte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en bas du versant méridional de la butte",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en bas du versant méridional de la butte.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -231319,12 +231358,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -231388,33 +231421,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "méridional de la butte",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "versant méridional de la butte",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "versant méridional de la butte",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "méridional",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "versant méridional de la butte",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "versant méridional de la butte",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en bas du versant méridional",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -231436,12 +231476,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -231517,33 +231551,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans le bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans le bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans le bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Mexico",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans le bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -231553,12 +231594,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -231639,33 +231674,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le bureau public de télégraphe",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans le bureau public de télégraphe à Mexico",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -231705,12 +231747,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -231767,33 +231803,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sans consultations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sans consultations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sans consultations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sans consultations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "il agit sans consultations",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "agit sans consultations",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Wilson agit sans consultations.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -231839,12 +231882,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -231895,42 +231932,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "questions éthiques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "questions éthiques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "questions éthiques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "questions éthiques",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une paix mondiale moins militarisée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une paix mondiale moins militarisée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour une paix mondiale moins militarisée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -232028,33 +232066,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "son avis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "son avis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "son avis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "son avis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ses avis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "son avis",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'avis de Wilson",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -232094,12 +232139,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -232156,33 +232195,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sur la ZAC Actiparc près d' Arras",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sur la ZAC Actiparc près d' Arras",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sur la ZAC Actiparc près d' Arras",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "ZAC Actiparc près d' Arras.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "ZAC Actiparc près d' Arras",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ZAC Actiparc près d' Arras",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur la ZAC Actiparc près d'Arras",
-                  "rougeL": 0.8888888888888888
+                  "rougeL": 0.8888888888888888,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -232192,12 +232238,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -232280,33 +232320,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Actiparc",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ZAC Actiparc",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ZAC Actiparc",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Actiparc",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Actiparc",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ZAC Actiparc près d' Arras",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "ZAC Actiparc près d'Arras.",
-                  "rougeL": 0.8888888888888888
+                  "rougeL": 0.8888888888888888,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -232352,12 +232399,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -232410,33 +232451,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "très élevées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "très élevées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "très élevées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "très élevées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "élevées",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "très élevées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "très élevées",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -232470,12 +232518,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -232539,33 +232581,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le haut commandement de l'armée",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -232605,12 +232654,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -232661,33 +232704,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "haut commandement de l' armée",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "au haut commandement de l'armée",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -232715,12 +232765,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -232789,33 +232833,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "écarter de Gaulle après son échec de Dakar",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "écarter de Gaulle après son échec de Dakar",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "écarter de Gaulle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "écarter de Gaulle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "écarter de Gaulle après son échec de Dakar",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "écarter de Gaulle après son échec de Dakar, et incertains de l' accueil que leur réserveraient les généraux vichystes, cherchent un général français prestigieux pour prendre le commandement de l' armée d' Afrique",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Alliés envisagent un débarquement en Afrique du Nord",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -232837,12 +232888,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -232911,42 +232956,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "écarter de Gaulle après son échec de Dakar",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "écarter de Gaulle après son échec de Dakar",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "écarter de Gaulle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "écarter de Gaulle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "écarter de Gaulle après son échec de Dakar",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "cherchent un général français prestigieux pour prendre le commandement de l' armée d' Afrique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la décision prise par les Alliés dirigeants est de chercher un général français prestigieux pour prendre le commandement de l'armée d'Afrique.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -233051,42 +233097,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "impératrice russe",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "impératrice russe",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "impératrice russe",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' impératrice russe",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "caisse de l' impératrice russe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "caisse de l' impératrice russe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la caisse de l'impératrice russe",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -233197,33 +233244,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pour savoir comment leurs différends ont pu être divulgués dans la presse et fait valoir son droit d' exprimer ses opinions sur la stratégie",
-                  "rougeL": 0.23529411764705882
+                  "rougeL": 0.23529411764705882,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pour savoir comment leurs différends ont pu être divulgués dans la presse et fait valoir son droit d' exprimer ses opinions sur la stratégie",
-                  "rougeL": 0.23529411764705882
+                  "rougeL": 0.23529411764705882,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "leurs différends ont pu être divulgués dans la presse",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "stratégie",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "de la stratégie",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "stratégie",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Lloyd George écrit à Robertson pour savoir comment leurs différends ont pu être divulgués dans la presse et fait valoir son droit d'exprimer ses opinions sur la stratégie.",
-                  "rougeL": 0.14814814814814814
+                  "rougeL": 0.14814814814814814,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -233239,12 +233293,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -233325,33 +233373,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sous domination allemande depuis 1871",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "occupée",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sous domination allemande depuis 1871",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Thann, sous domination allemande depuis 1871, est occupée et devient, jusqu' à la fin de la guerre,",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "sous domination allemande depuis 1871",
-                  "rougeL": 0.9090909090909091
+                  "rougeL": 0.9090909090909091,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Thann, sous domination allemande depuis 1871, est occupée et devient, jusqu' à la fin de la guerre, la capitale d' une portion de territoire alsacien redevenue française.",
-                  "rougeL": 0.4799999999999999
+                  "rougeL": 0.4799999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Thann, sous domination allemande depuis 1871, est occupée et devient, jusqu' à la fin de la guerre, la capitale d' une portion de territoire alsacien redevenue française.",
-                  "rougeL": 0.4799999999999999
+                  "rougeL": 0.4799999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -233367,12 +233422,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -233453,33 +233502,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "les corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "les corps de 16 hommes exécutés par la Gestapo",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 0.85
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les corps de 16 hommes",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.85
                 },
                 "GPT-3.5": {
                   "answer_pred": "les corps de 16 hommes exécutés par la Gestapo.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -233777,48 +233833,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -233899,33 +233913,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "16 hommes",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "16 hommes exécutés par la Gestapo",
-                  "rougeL": 0.9411764705882353
+                  "rougeL": 0.9411764705882353,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "16 hommes",
-                  "rougeL": 0.3636363636363636
+                  "rougeL": 0.3636363636363636,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les corps de 16 hommes",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les corps de 16 hommes exécutés par la Gestapo.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -233941,12 +233962,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -234027,33 +234042,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "400 kg",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "plus de 400 kg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -234081,12 +234103,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -234155,33 +234171,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "utiliser ces conseils comme éléments de base de discussions sur la stratégie future",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "utiliser ces conseils comme éléments de base de discussions sur la stratégie future",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "d' utiliser ces conseils comme éléments de base de discussions sur la stratégie future",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' utiliser ces conseils comme éléments de base de discussions sur la stratégie future.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "envoi d' aide à l' Italie et la préparation d' offensives en Palestine",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "utiliser ces conseils comme éléments de base de discussions sur la stratégie future",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Lloyd George a décidé d'utiliser ces conseils comme éléments de base de discussions sur la stratégie future.",
-                  "rougeL": 0.5833333333333334
+                  "rougeL": 0.5833333333333334,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -234227,12 +234250,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -234277,33 +234294,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "utiliser ces conseils comme éléments de base de discussions sur la stratégie future",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "utiliser ces conseils comme éléments de base de discussions sur la stratégie future",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "utiliser ces conseils comme éléments de base de discussions sur la stratégie future",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "d' utiliser ces conseils comme éléments de base de discussions sur la stratégie future.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "envoi d' aide à l' Italie et la préparation d' offensives en Palestine",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "utiliser ces conseils comme éléments de base de discussions sur la stratégie future",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Lloyd George décide d'utiliser ces conseils comme éléments de base de discussions sur la stratégie future.",
-                  "rougeL": 0.608695652173913
+                  "rougeL": 0.608695652173913,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -234325,12 +234349,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -234405,33 +234423,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -234471,12 +234496,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -234527,33 +234546,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Bordeaux",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -234587,12 +234613,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -234655,33 +234675,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Le matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le matin.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -234715,12 +234742,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -234778,33 +234799,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "18 mars 1918",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le matin, l' artillerie allemande bombarde les lignes françaises à l' aide d' obus à gaz",
-                  "rougeL": 0.18181818181818182
+                  "rougeL": 0.18181818181818182,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "le matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "le matin",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -234820,12 +234848,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -234907,42 +234929,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "bande dessinée",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "production assez importante",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "une production assez importante",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "une production assez importante",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "une production assez importante de bande dessinée",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une production assez importante existe depuis au moins les années 2000",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "une production assez importante",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -235047,42 +235070,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "duchesse de Hohenberg",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "duchesse de Hohenberg",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "duchesse de Hohenberg",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' archiduc François - Ferdinand, héritier de l' Empire austro-hongrois, et son épouse la duchesse de Hohenberg,",
-                  "rougeL": 0.4166666666666667
+                  "rougeL": 0.4166666666666667,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Duchesse de Hohenberg",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la duchesse de Hohenberg",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'épouse la duchesse de Hohenberg",
-                  "rougeL": 0.7272727272727272
+                  "rougeL": 0.7272727272727272,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -235183,33 +235207,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "archiduc François - Ferdinand",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "François - Ferdinand",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "archiduc François - Ferdinand",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "François - Ferdinand",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "François-Ferdinand, héritier de l'Empire austro-hongrois",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la duchesse de Hohenberg",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "son épouse la duchesse de Hohenberg",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -235219,12 +235250,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -235313,33 +235338,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "début de juin 1920",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En février 1920.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -235385,12 +235417,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -235437,33 +235463,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "février 1920",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -235503,12 +235536,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -235567,33 +235594,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "masques à gaz et systèmes de protection",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "masques à gaz et systèmes de protection",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "masques à gaz et systèmes de protection",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "masques à gaz et systèmes de protection",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des masques à gaz et systèmes de protection",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "masques à gaz et systèmes de protection",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des masques à gaz et systèmes de protection",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -235639,12 +235673,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -235689,33 +235717,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "masques à gaz et systèmes de protection",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "inventer des masques à gaz et systèmes de protection",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "inventer des masques à gaz et systèmes de protection",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "masques à gaz et systèmes de protection",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "inventer des masques à gaz et systèmes de protection",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "masques à gaz et systèmes de protection",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "inventer des masques à gaz et systèmes de protection",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -235731,12 +235766,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -235817,42 +235846,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "sa carrière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "carrière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sa carrière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "sa carrière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "gendarme à Gaillon",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "débute sa carrière",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Jules Arsène Crosnier a commencé sa carrière en tant que gendarme à Gaillon puis à Louviers.",
-                  "rougeL": 0.09523809523809523
+                  "rougeL": 0.09523809523809523,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -235945,33 +235975,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "90 % des pertes de la 25e division d' infanterie britannique",
-                  "rougeL": 0.875
+                  "rougeL": 0.875,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pertes importantes à l' infanterie britannique",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pertes importantes à l' infanterie britannique",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pertes importantes à l' infanterie britannique.",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "90 % des pertes de la 25e division d' infanterie britanniques proviennent de l' artillerie allemande.",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "des pertes importantes à l' infanterie britannique",
-                  "rougeL": 0.4615384615384615
+                  "rougeL": 0.4615384615384615,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "des pertes importantes à l'infanterie britannique",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -236017,12 +236054,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -236073,42 +236104,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Le combat contre la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "combat contre la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Le combat contre la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Le combat contre la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Bethmann Hollweg",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le combat contre la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le combat contre la Russie.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -236197,33 +236229,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "les sympathisants des Habsbourg et au sein de la branche des financiers",
-                  "rougeL": 0.15384615384615385
+                  "rougeL": 0.15384615384615385,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le combat contre la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "combat contre la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Russie",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Allemagne du sud",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Russie",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les financiers des Habsbourg apportent leur aide à l'Allemagne du sud.",
-                  "rougeL": 0.11764705882352941
+                  "rougeL": 0.11764705882352941,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -236233,12 +236272,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -236327,33 +236360,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "à déposer au procès Pucheu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "explique ses engagements envers l' accusé, ainsi que leur violation lors de la mise en résidence surveillée de ce dernier",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Giraud explique ses engagements envers l' accusé, ainsi que leur violation lors de la mise en résidence surveillée de ce dernier",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "appelé par la défense à déposer au procès Pucheu",
-                  "rougeL": 0.8333333333333333
+                  "rougeL": 0.8333333333333333,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "pour déposer au procès Pucheu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "explique ses engagements envers l' accusé, ainsi que leur violation lors de la mise en résidence surveillée de ce dernier",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour déposer au procès Pucheu",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -236399,12 +236439,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -236455,33 +236489,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "déplacements de populations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "déplacements de populations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des déplacements de populations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "déplacements de populations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des déplacements de populations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "déplacements de populations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "des déplacements de populations",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -236503,12 +236544,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -236583,33 +236618,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "En juin 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "En juin 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "juin 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "juin 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en juin 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "juin 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En juin 1914.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -236631,12 +236673,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -236712,33 +236748,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1914",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1914",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "juin 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "juin 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1914",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "juin 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En juin 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -236766,12 +236809,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -236841,33 +236878,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -236895,12 +236939,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -236964,33 +237002,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en 1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en 1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1881",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -237024,12 +237069,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -237093,33 +237132,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "cimetières des communes voisines ou créés à proximité des champs de bataille",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "sur le front",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "sur le front",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cimetières des communes voisines ou créés à proximité des champs de bataille",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur les champs de bataille",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sur le front",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "sur le front",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -237159,12 +237205,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -237217,33 +237257,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "inhumés dans des cimetières des communes voisines ou créés à proximité des champs de bataille ou encore demeurèrent sans sépulture dans les lieux les plus inaccessibles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "inhumés dans des cimetières des communes voisines ou créés à proximité des champs de bataille ou encore demeurèrent sans sépulture dans les lieux les plus inaccessibles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "inhumés dans des cimetières des communes voisines ou créés à proximité des champs de bataille ou encore demeurèrent sans sépulture dans les lieux les plus inaccessibles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cimetières des communes voisines ou créés à proximité des champs de bataille ou encore demeurèrent sans sépulture dans les lieux les plus inaccessibles",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sur le front",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "sur le front",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Les militaires sont morts sur le front.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -237271,12 +237318,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -237347,33 +237388,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "alerte l' armée russe qui bloque puis repousse les deux autres corps d' armée entraînant la retraite allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "alerte l' armée russe qui bloque puis repousse les deux autres corps d' armée entraînant la retraite allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "alerte l' armée russe qui bloque puis repousse les deux autres corps d' armée entraînant la retraite allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "bloque puis repousse les deux autres corps d' armée entraînant la retraite allemande.",
-                  "rougeL": 0.8695652173913044
+                  "rougeL": 0.8695652173913044,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "alarme l' armée russe qui bloque puis repousse les deux autres corps d' armée entraînant la retraite allemande.",
-                  "rougeL": 0.9230769230769231
+                  "rougeL": 0.9230769230769231,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "alarte l' armée russe qui bloque puis repousse les deux autres corps d' armée entraînant la retraite allemande",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "La séquence de mots qui répond à la question est \"bloque puis repousse les deux autres corps d'armée\".",
-                  "rougeL": 0.48000000000000004
+                  "rougeL": 0.48000000000000004,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -237413,12 +237461,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -237476,33 +237518,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un résumé des nouvelles militaires qui pour la plupart sont déjà publiées dans la presse",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "résumé des nouvelles militaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un résumé des nouvelles militaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un résumé des nouvelles militaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "llama-2_lora": {
                   "answer_pred": "un résumé des nouvelles militaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "résumé des nouvelles militaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "GPT-3.5": {
                   "answer_pred": "Robertson donne aux membres un résumé des nouvelles militaires.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 0.85
                 }
               },
               "human_annot": {
@@ -237548,48 +237597,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -237922,33 +237929,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "nouvelles militaires qui pour la plupart sont déjà publiées dans la presse",
-                  "rougeL": 0.4444444444444444
+                  "rougeL": 0.4444444444444444,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "des nouvelles militaires qui pour la plupart sont déjà publiées dans la presse",
-                  "rougeL": 0.4444444444444444
+                  "rougeL": 0.4444444444444444,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "nouvelles militaires",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "des nouvelles militaires",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des nouvelles militaires qui pour la plupart sont déjà publiées dans la presse",
-                  "rougeL": 0.4444444444444444
+                  "rougeL": 0.4444444444444444,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "résumé des nouvelles militaires",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Robertson donne aux membres un résumé des nouvelles militaires.",
-                  "rougeL": 0.5454545454545454
+                  "rougeL": 0.5454545454545454,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -237958,12 +237972,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -238050,33 +238058,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le commandement de l' ANZAC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le commandement de l' ANZAC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "commandement de l' ANZAC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le commandement de l' ANZAC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "commandement de l' ANZAC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le commandement de l' ANZAC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le commandement de l'ANZAC",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -238104,12 +238119,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -238172,33 +238181,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le commandement de l' ANZAC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "le commandement de l' ANZAC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "commandement de l' ANZAC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' ANZAC",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "IIe ANZAC",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "commandement de l' ANZAC",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le commandement de l'ANZAC",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -238208,12 +238224,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -238300,33 +238310,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Izvestias de Kronstadt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Izvestias de Kronstadt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "les Izvestias de Kronstadt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Izvestias de Kronstadt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Izvestias de Kronstadt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "les Izvestias de Kronstadt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Izvestias de Kronstadt",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -238372,12 +238389,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -238428,33 +238439,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "soldats de toutes les puissances combattantes",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "ennemi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "ennemi",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' ennemi.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "soldats de toutes les puissances combattantes",
-                  "rougeL": 0.33333333333333337
+                  "rougeL": 0.33333333333333337,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "soldats",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "tous les soldats ou soldats de toutes les puissances combattantes",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -238464,12 +238482,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -238573,33 +238585,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "pirates",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "pirates",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "François Deuve",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "pirates",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "pirates et seigneurs de la guerre",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "pirates",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'Inconstant traque les pirates.",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -238639,12 +238658,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -238701,33 +238714,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le Conseil général de l' Aisne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Conseil général de l' Aisne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le Conseil général de l' Aisne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Conseil général de l' Aisne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le Conseil général de l' Aisne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Conseil général de l' Aisne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Conseil général de l'Aisne",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -238743,12 +238763,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -238823,33 +238837,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Conseil général de l' Aisne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Conseil général de l' Aisne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Conseil général de l' Aisne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Aisne",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Aisne",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Conseil général de l' Aisne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Conseil général de l'Aisne",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -238871,12 +238892,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -238951,42 +238966,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "découper un lambeau de peau du bras à apposer sur le visage dans la plaie afin que celle-ci se ferme grâce à la peau fournie et à maintenir à l' aide d' une structure métallique",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "procédés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "des procédés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "cette méthode consistait à découper un lambeau de peau du bras à apposer sur le visage dans la plaie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "procédés",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "une méthode consistait à découper un lambeau de peau du bras à apposer sur le visage dans la plaie afin que celle-ci se ferme grâce à la peau fournie et à maintenir à l' aide d' une structure métallique le bras sanglant au visage afin de vasculariser la plaie pour qu' elle se referme",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "cette méthode",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -239085,33 +239101,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "la production des R.V I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la production des R.V I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la production des R.V I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la production des R.V I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "transformé en hydravion et équipé de flotteurs pour l' aéronavale allemande, et désigné en tant que type L avec le numéro de série 1432.",
-                  "rougeL": 0.09999999999999999
+                  "rougeL": 0.09999999999999999,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la production des R.V I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la production des R.V I",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -239157,12 +239180,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -239213,33 +239230,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -239273,12 +239297,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -239335,33 +239353,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -239401,12 +239426,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -239463,33 +239482,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le modèle « French gendarme »",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -239529,12 +239555,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -239585,33 +239605,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un modèle « French gendarme » sans qu' aucun élément ne vienne valider la réalité de cette variante",
-                  "rougeL": 0.47058823529411764
+                  "rougeL": 0.47058823529411764,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un modèle « French gendarme »",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "modèle « French gendarme »",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un modèle « French gendarme »",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "French gendarme",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un modèle « French gendarme »",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un modèle \"French gendarme\"",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -239645,12 +239672,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -239713,42 +239734,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "commémorer le 50e anniversaire du génocide arménien en 1965",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "commémorer le 50e anniversaire du génocide arménien en 1965",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour commémorer le 50e anniversaire du génocide arménien en 1965",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "commémorer le 50e anniversaire du génocide arménien",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "50e anniversaire du génocide arménien",
-                  "rougeL": 0.7692307692307693
+                  "rougeL": 0.7692307692307693,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "50e anniversaire du génocide arménien",
-                  "rougeL": 0.7692307692307693
+                  "rougeL": 0.7692307692307693,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour commémorer le 50e anniversaire du génocide arménien en 1965.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -239841,33 +239863,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un groupe formé de 2 bataillons, 2 escadrons et 3 pièces d' artillerie en mouvement en direction de Dommartin - Lettrée",
-                  "rougeL": 0.3846153846153846
+                  "rougeL": 0.3846153846153846,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un groupe formé de 2 bataillons, 2 escadrons et 3 pièces d' artillerie en mouvement en direction de Dommartin - Lettrée",
-                  "rougeL": 0.3846153846153846
+                  "rougeL": 0.3846153846153846,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "2 bataillons, 2 escadrons et 3 pièces d' artillerie",
-                  "rougeL": 0.37499999999999994
+                  "rougeL": 0.37499999999999994,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "2 bataillons, 2 escadrons et 3 pièces d' artillerie en mouvement en direction de Dommartin - Lettrée.",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "2 bataillons, 2 escadrons et 3 pièces d' artillerie",
-                  "rougeL": 0.37499999999999994
+                  "rougeL": 0.37499999999999994,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un groupe formé de 2 bataillons, 2 escadrons et 3 pièces d' artillerie",
-                  "rougeL": 0.5555555555555556
+                  "rougeL": 0.5555555555555556,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la séquence de mots qui répond à la question est \"les troupes avancées\"",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -239889,12 +239918,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -239963,33 +239986,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "un groupe formé de 2 bataillons, 2 escadrons et 3 pièces d' artillerie en mouvement en direction de Dommartin - Lettrée",
-                  "rougeL": 0.3846153846153846
+                  "rougeL": 0.3846153846153846,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "un groupe formé de 2 bataillons, 2 escadrons et 3 pièces d' artillerie en mouvement en direction de Dommartin - Lettrée",
-                  "rougeL": 0.3846153846153846
+                  "rougeL": 0.3846153846153846,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "un groupe formé de 2 bataillons, 2 escadrons et 3 pièces d' artillerie",
-                  "rougeL": 0.5555555555555556
+                  "rougeL": 0.5555555555555556,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "un groupe formé de 2 bataillons, 2 escadrons et 3 pièces d' artillerie en mouvement en direction de Dommartin - Lettrée.",
-                  "rougeL": 0.3846153846153846
+                  "rougeL": 0.3846153846153846,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "un groupe formé de 2 bataillons, 2 escadrons et 3 pièces d' artillerie en mouvement en direction de Dommartin - Lettrée",
-                  "rougeL": 0.3846153846153846
+                  "rougeL": 0.3846153846153846,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "un groupe formé de 2 bataillons, 2 escadrons et 3 pièces d' artillerie en mouvement en direction de Dommartin - Lettrée.",
-                  "rougeL": 0.3846153846153846
+                  "rougeL": 0.3846153846153846,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un groupe formé de 2 bataillons, 2 escadrons et 3 pièces d'artillerie en mouvement en direction de Dommartin-Lettrée.",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -240017,12 +240047,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -240103,33 +240127,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "général Ioudenitch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ioudenitch",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "général Ioudenitch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "général Ioudenitch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ioudenitch",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "général Ioudenitch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le général Ioudenitch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -240151,12 +240182,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -240237,33 +240262,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "général Ioudenitch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ioudenitch",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "général Ioudenitch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "général Ioudenitch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Ioudenitch",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "général Ioudenitch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le général Ioudenitch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -240285,12 +240317,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_3",
@@ -240365,33 +240391,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -240419,12 +240452,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -240487,33 +240514,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le 9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 9 mars 1943",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -240553,12 +240587,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -240615,33 +240643,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "L'article ne mentionne pas où la famille de Louis Murat a érigé un monument à sa mémoire.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -240675,12 +240710,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -240737,33 +240766,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à Lihons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -240803,12 +240839,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -240865,33 +240895,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Gero",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Gero",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Gero",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Gero",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Gero, son fils aîné, leutnant au Grenadier - Regiment (mot.) 51",
-                  "rougeL": 0.25
+                  "rougeL": 0.25,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Gero",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Gero, son fils aîné.",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -240931,12 +240968,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -240990,33 +241021,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "santé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la santé, y compris la santé reproductive de nombreux animaux, et celle des humains qui les consomment",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "la santé, y compris la santé reproductive de nombreux animaux, et celle des humains qui les consomment",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "la santé",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "la santé, y compris la santé reproductive de nombreux animaux, et celle des humains qui les consomment.",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la santé, y compris la santé reproductive de nombreux animaux, et celle des humains qui les consomment",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la santé, y compris la santé reproductive de nombreux animaux, et celle des humains qui les consomment",
-                  "rougeL": 0.16666666666666669
+                  "rougeL": 0.16666666666666669,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -241056,12 +241094,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_6",
@@ -241118,33 +241150,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Maximilian von Spee décide de mener avec ses deux croiseurs cuirassés un combat retardateur désespéré.",
-                  "rougeL": 0.7333333333333334
+                  "rougeL": 0.7333333333333334,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -241178,12 +241217,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -241240,33 +241273,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "décide de mener avec ses deux croiseurs cuirassés un combat retardateur désespéré",
-                  "rougeL": 0.9565217391304348
+                  "rougeL": 0.9565217391304348,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Maximilian von Spee décide de mener avec ses deux croiseurs cuirassés un combat retardateur désespéré.",
-                  "rougeL": 0.7333333333333334
+                  "rougeL": 0.7333333333333334,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -241312,12 +241352,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -241368,42 +241402,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Dans ce port sous contrôle britannique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "port sous contrôle britannique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Dans ce port sous contrôle britannique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "port sous contrôle britannique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans ce port sous contrôle britannique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ce port sous contrôle britannique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Dans ce port sous contrôle britannique, on trouve des navires de guerre français et russes.",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -241490,33 +241525,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Dans ce port sous contrôle britannique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Les Alliés",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "TsingTao",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "TsingTao",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Port sous contrôle britannique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "ce port sous contrôle britannique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans ce port sous contrôle britannique, en plus de nombreux navires marchands, on trouve des navires de guerre français et russes.",
-                  "rougeL": 0.47058823529411764
+                  "rougeL": 0.47058823529411764,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -241544,12 +241586,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -241618,33 +241654,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'y a pas de séquence de mots dans l'article qui répond à la question \"À quelle date l'ennemi est-il arrivé à Épernay ?\"",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -241660,12 +241703,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -241742,33 +241779,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "d'une part Châlons et d'autre part Épernay",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -241808,12 +241852,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -241866,42 +241904,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "15 juillet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "NULL",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Il n'y a pas d'information dans l'article concernant la date d'arrivée de l'ennemi à Épernay.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -242008,33 +242047,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "assiste avec Pétain à la démonstration du châssis Schneider",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Estienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Estienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pétain",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Estienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Estienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Estienne.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -242056,12 +242102,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_2",
@@ -242142,33 +242182,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Pétain",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Estienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Pétain",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Pétain",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Estienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Estienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Estienne",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -242202,12 +242249,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -242270,33 +242311,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1907.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -242324,12 +242372,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -242393,33 +242435,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "En 1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "En 1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "En 1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "En 1907",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -242459,12 +242508,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -242522,33 +242565,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "turques et grecques",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "turques et grecques",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "turques et grecques",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "(turques et grecques).",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "turques et grecques",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "populations mélangées (turques et grecques)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les populations mélangées (turques et grecques)",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -242558,12 +242608,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -242650,33 +242694,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Lower Hutt en Nouvelle-Zélande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Lower Hutt en Nouvelle-Zélande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Lower Hutt en Nouvelle-Zélande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Lower Hutt en Nouvelle-Zélande.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Lower Hutt en Nouvelle-Zélande.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Lower Hutt en Nouvelle-Zélande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Lower Hutt en Nouvelle-Zélande.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -242716,12 +242767,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -242797,33 +242842,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Maximilian von Weichs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "von Weichs",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "von Weichs",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "von Weichs",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Maximilian von Weichs",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "von Weichs",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "von Weichs",
-                  "rougeL": 0.7272727272727273
+                  "rougeL": 0.7272727272727273,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -242851,12 +242903,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -242925,33 +242971,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dépotoir d' atelier",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dépotoir d' atelier",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dépotoir d' atelier",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dépotoir d' atelier,",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "des dizaines de rebuts de tôle de laiton",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dépotoir d' atelier, découvert sur la ZAC Actiparc près d' Arras",
-                  "rougeL": 0.4210526315789473
+                  "rougeL": 0.4210526315789473,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "un dépotoir d'atelier",
-                  "rougeL": 0.6
+                  "rougeL": 0.6,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -242997,12 +243050,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -243049,33 +243096,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dépotoir d' atelier",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.9
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dépotoir d' atelier",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.9
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dépotoir d' atelier",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.9
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dépotoir d' atelier,",
-                  "rougeL": 0.888888888888889
+                  "rougeL": 0.888888888888889,
+                  "HScore": 0.85
                 },
                 "llama-2_lora": {
                   "answer_pred": "des dizaines de rebuts de tôle de laiton",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.25
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dépotoir d' atelier, découvert sur la ZAC Actiparc près d' Arras",
-                  "rougeL": 0.4210526315789473
+                  "rougeL": 0.4210526315789473,
+                  "HScore": 0.85
                 },
                 "GPT-3.5": {
                   "answer_pred": "des fouilles archéologiques récentes ont permis de retrouver les traces d'un dépotoir d'atelier",
-                  "rougeL": 0.4444444444444444
+                  "rougeL": 0.4444444444444444,
+                  "HScore": 0.9
                 }
               },
               "human_annot": {
@@ -243205,48 +243259,6 @@
                     "rating": "Erreur inacceptable"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -243515,33 +243527,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Italie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Le gouvernement a du mal à maîtriser ce sentiment auprès d' un nombre toujours croissant de patriotes et d' interventionnistes",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Italie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "l' Italie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "l' Italie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' Italie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'Italie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -243557,12 +243576,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -243644,33 +243657,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le 26 octobre 1914.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -243710,12 +243730,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -243766,33 +243780,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "26 octobre 1914",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 26 octobre 1914.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -243826,12 +243847,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -243912,33 +243927,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Les Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Les Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "les Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -243984,12 +244006,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_2",
@@ -244054,33 +244070,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Belges",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Algériens, le 2e régiment de tirailleurs Algériens, le 1er régiment de tirailleurs Algériens, le 2e régiment de Zouaves, des Belges et des Canadiens",
-                  "rougeL": 0.06896551724137932
+                  "rougeL": 0.06896551724137932,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Les Allemands",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "5200 soldats",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Belges et Canadiens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "les troupes alliées",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.0
                 }
               },
               "human_annot": {
@@ -244114,12 +244137,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur inacceptable"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -244184,33 +244201,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Courlande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Lettons, en favorisant l' installation de Germano - Russes venant du domaine iméprial russe, de domaines du clergé et de grands propriétaires terriens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "l' installation de Germano - Russes venant du domaine iméprial russe, de domaines du clergé et de grands propriétaires terriens",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Lettons",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Courlande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Courlande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "les nouveaux États",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -244238,12 +244262,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -244313,33 +244331,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "novembre 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "novembre 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "novembre 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "novembre 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "novembre 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "novembre 1919",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Suite aux élections de novembre 1919, le Bloc National s'avère être le grand gagnant.",
-                  "rougeL": 0.3076923076923077
+                  "rougeL": 0.3076923076923077,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -244367,12 +244392,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -244441,33 +244460,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -244501,12 +244527,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -244563,33 +244583,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Robertson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Robertson.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -244623,12 +244650,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_4",
@@ -244691,33 +244712,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Vitry - le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.85
                 },
                 "GPT-3.5": {
                   "answer_pred": "Adolphe Guillaumat combat à Vitry-le-François lors de la première bataille de la Marne.",
-                  "rougeL": 0.27272727272727276
+                  "rougeL": 0.27272727272727276,
+                  "HScore": 0.9
                 }
               },
               "human_annot": {
@@ -244889,48 +244917,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Partiellement correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Erreur acceptable (\"humaine\")"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -245099,12 +245085,6 @@
                     "rating": "Erreur acceptable (\"humaine\")"
                   }
                 ],
-                "question": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Question douteuse"
-                  }
-                ],
                 "no_answer": [
                   {
                     "annot": "annot_10",
@@ -245149,33 +245129,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Vitry -le - François",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Adolphe Guillaumat combat à Vitry-le-François lors de la première bataille de la Marne.",
-                  "rougeL": 0.27272727272727276
+                  "rougeL": 0.27272727272727276,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -245203,12 +245190,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -245277,33 +245258,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "armée belge emmenée par le roi Albert Ier inflige donc un grave échec à l' armée impériale allemande au tout début de la campagne",
-                  "rougeL": 0.9189189189189189
+                  "rougeL": 0.9189189189189189,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "armée impériale allemande",
-                  "rougeL": 0.3
+                  "rougeL": 0.3,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "L' armée belge emmenée par le roi Albert Ier inflige donc un grave échec à l' armée impériale allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "L' armée belge emmenée par le roi Albert Ier inflige donc un grave échec à l' armée impériale allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "le 4 août 1914",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "l' armée belge emmenée par le roi Albert Ier inflige donc un grave échec à l' armée impériale allemande",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "que l'armée belge emmenée par le roi Albert Ier inflige un grave échec à l'armée impériale allemande.",
-                  "rougeL": 0.7999999999999999
+                  "rougeL": 0.7999999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -245337,12 +245325,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -245406,33 +245388,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -245688,48 +245677,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_2",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  },
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -245852,33 +245799,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "12 décembre 1941",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 12 décembre 1941.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -245918,12 +245872,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -245980,33 +245928,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans une charrette de ses parents",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "dans une charrette de ses parents",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans une charrette de ses parents",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans une charrette de ses parents",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans une charrette de ses parents",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "dans une charrette de ses parents",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans une charrette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -246046,12 +246001,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -246103,33 +246052,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "charrette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "charrette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "charrette de ses parents",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "charrette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "charrette de ses parents",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "charrette",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans une charrette de ses parents",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -246151,12 +246107,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_8",
@@ -246232,33 +246182,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "13 novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "13 novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "13 novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "13 novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "13 novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "13 novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Le 13 novembre",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -246298,12 +246255,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -246398,33 +246349,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "adjudant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Konrad Krafft von Dellmensingen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Konrad Krafft von Dellmensingen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "adjudant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "adjudant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Konrad Krafft von Dellmensingen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Konrad Krafft von Dellmensingen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -246446,12 +246404,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -246558,33 +246510,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "adjudant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "adjudant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Konrad Krafft von Dellmensingen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "adjudant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "adjudant",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Konrad Krafft von Dellmensingen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Konrad Krafft von Dellmensingen",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -246600,12 +246559,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -246688,33 +246641,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "catalyseur",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "catalyseur",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "comme catalyseur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "comme catalyseur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "comme catalyseur",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Verdun agit comme catalyseur",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Verdun agit comme catalyseur.",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -246724,12 +246684,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -246828,33 +246782,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Au Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -246882,12 +246843,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -246962,33 +246917,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "le Kaiser",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -247022,12 +246984,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -247090,33 +247046,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Abraham Bloch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Abraham Bloch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Abraham Bloch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Abraham Bloch Abraham Bloch",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Abraham Bloch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Abraham Bloch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Abraham Bloch",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -247144,12 +247107,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -247215,33 +247172,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Wilson.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Wilson",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -247275,12 +247239,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_5",
@@ -247385,33 +247343,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Lord Kitchener",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Smith - Dorrien participe à la bataille de Gennis.",
-                  "rougeL": 0.7142857142857143
+                  "rougeL": 0.7142857142857143,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -247439,12 +247404,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_10",
@@ -247549,33 +247508,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Lord Kitchener",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Lord Kitchener",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Smith - Dorrien",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -247597,12 +247563,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -247677,33 +247637,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "veille de ses congrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "veille de ses congrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "à la veille de ses congrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "à la veille de ses congrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à la veille de ses congrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à la veille de ses congrès",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "\"On la fait dire des offices religieux à la veille de ses congrès à la fois à l'Église, au Temple et à la Synagogue.\"",
-                  "rougeL": 0.2857142857142857
+                  "rougeL": 0.2857142857142857,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -247719,12 +247686,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -247806,33 +247767,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Baltique",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "mer Baltique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "mer Baltique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "mer Baltique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Baltique",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "mer Baltique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "mer Baltique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -247878,12 +247846,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_5",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_5",
@@ -247925,42 +247887,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "au travers de l' entrée du golfe de Finlande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "au travers de l' entrée du golfe de Finlande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "au travers de l' entrée du golfe de Finlande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "au travers de l' entrée du golfe de Finlande.",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "au travers de l' entrée du golfe de Finlande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "derrière les barrages de mines qui s' étendaient au travers de l' entrée du golfe de Finlande",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "dans la mer Baltique",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -248050,33 +248013,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Zweig",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Zweig",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Zweig",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Zweig",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Stefan Zweig",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Stefan Zweig",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Stefan Zweig",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -248086,12 +248056,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_7",
@@ -248172,33 +248136,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Stefan Zweig",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Zweig",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Stefan Zweig",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Stefan Zweig",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Stefan Zweig",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Stefan Zweig",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Stefan Zweig.",
-                  "rougeL": 0.7499999999999999
+                  "rougeL": 0.7499999999999999,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -248238,12 +248209,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -248300,33 +248265,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "en arrière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "en arrière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "en arrière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "en arrière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "en arrière.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "en arrière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en arrière",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -248360,12 +248332,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_8",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_8",
@@ -248470,42 +248436,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Victor Goybet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Victor Goybet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Victor Goybet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Victor Goybet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Victor Goybet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Victor",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Victor Goybet.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -248636,33 +248603,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Victor Goybet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Victor Goybet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Victor Goybet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Victor Goybet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Victor Goybet",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Victor",
-                  "rougeL": 0.5714285714285715
+                  "rougeL": 0.5714285714285715,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "Victor Goybet.",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -248708,12 +248682,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_4",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_4",
@@ -248766,33 +248734,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "armistice",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "armistice",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "armistice",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "armistice",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "armistice",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "demande d' armistice",
-                  "rougeL": 0.8
+                  "rougeL": 0.8,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "l'armistice",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -248838,12 +248813,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -248894,42 +248863,43 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "À la fin",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fin de l' ouvrage",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "fin",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fin",
-                  "rougeL": 0.6666666666666666
+                  "rougeL": 0.6666666666666666,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à la fin du livre",
-                  "rougeL": 0.5
+                  "rougeL": 0.5,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "À la fin de l'ouvrage",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à la fin de l'ouvrage",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_6",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_6",
@@ -249022,33 +248992,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Marc Pourpe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Marc Pourpe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Marc Pourpe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Marc Pourpe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Marc Pourpe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Marc Pourpe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Marc Pourpe",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -249082,12 +249059,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_3",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_3",
@@ -249150,33 +249121,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "fin du XIXe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "fin du XIXe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "fin du XIXe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "fin du XIXe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "à la fin du XIXe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "à la fin du XIXe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "à la fin du XIXe siècle",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -249186,12 +249164,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_1",
@@ -249278,33 +249250,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "dans un pays qui manquait presque tout, après des siècles de domination ottomane",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Albanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "dans un pays qui manquait presque tout",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "dans un pays qui manquait presque tout",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "dans un pays qui manquait presque tout, après des siècles de domination ottomane",
-                  "rougeL": 0.6153846153846153
+                  "rougeL": 0.6153846153846153,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Albanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Albanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -249338,12 +249317,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_1",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_1",
@@ -249400,33 +249373,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Albanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Albanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Albanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Albanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "Italie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Albanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "en Albanie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -249454,12 +249434,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_9",
@@ -249528,33 +249502,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "commémorer le 50e anniversaire du génocide arménien en 1965",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "commémorer le 50e anniversaire du génocide arménien en 1965",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "pour commémorer le 50e anniversaire du génocide arménien en 1965",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "commémorer le 50e anniversaire du génocide arménien",
-                  "rougeL": 0.9333333333333333
+                  "rougeL": 0.9333333333333333,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "50e anniversaire du génocide arménien en 1965",
-                  "rougeL": 0.8571428571428571
+                  "rougeL": 0.8571428571428571,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "50e anniversaire du génocide arménien",
-                  "rougeL": 0.7692307692307693
+                  "rougeL": 0.7692307692307693,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "pour commémorer le 50e anniversaire du génocide arménien en 1965",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -249564,12 +249545,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_7",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_7",
@@ -249686,33 +249661,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Ernest Jules Joseph Maunoury",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Ernest Jules Joseph Maunoury",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Ernest Jules Joseph Maunoury",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Ernest Jules Joseph Maunoury",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "sergent",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "Ernest Jules Joseph Maunoury",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "Ernest Jules Joseph Maunoury",
-                  "rougeL": 0.4
+                  "rougeL": 0.4,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -249752,12 +249734,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",
@@ -249814,33 +249790,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "llama-2_lora": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 1.0
                 }
               },
               "human_annot": {
@@ -249874,12 +249857,6 @@
                     "rating": "Correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_9",
-                    "rating": "Correct"
-                  }
-                ],
                 "llama-2_lora": [
                   {
                     "annot": "annot_9",
@@ -249937,33 +249914,40 @@
                 }
               ],
               "predictions": {
-                "MT5-large_260_AP0": {
+                "MT5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "T5-large_260_AP0": {
+                "T5-large": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
-                "FLAN-T5-large_260_AP0": {
+                "FLAN-T5-large": {
                   "answer_pred": "italien et russe",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "Camembert_baseline": {
                   "answer_pred": "Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "llama-2_lora": {
                   "answer_pred": "Italie",
-                  "rougeL": 0.0
+                  "rougeL": 0.0,
+                  "HScore": 0.5
                 },
                 "mixtral-8x7b": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 },
                 "GPT-3.5": {
                   "answer_pred": "la Russie",
-                  "rougeL": 1.0
+                  "rougeL": 1.0,
+                  "HScore": 0.5
                 }
               },
               "human_annot": {
@@ -250003,12 +249987,6 @@
                     "rating": "Partiellement correct"
                   }
                 ],
-                "llama-2-70b": [
-                  {
-                    "annot": "annot_10",
-                    "rating": "Partiellement correct"
-                  }
-                ],
                 "mixtral-8x7b": [
                   {
                     "annot": "annot_10",