diff --git a/examples/json_save.ipynb b/examples/json_save.ipynb
index 61db044db842d9e54fd1ed9cfb5ce9cfb4f5b629..03094aa242234a18b120b31afcebbb141204aee7 100644
--- a/examples/json_save.ipynb
+++ b/examples/json_save.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -22,7 +22,7 @@
        "     smooth_method='none', sparse=True, version='classic')"
       ]
      },
-     "execution_count": 1,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -39,16 +39,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Automaton.write(sp.automaton, train_file + \".json\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -57,7 +48,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,33 +57,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "str2 = Serializer.data_to_json(A)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "str1 == str2"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 7,
@@ -101,7 +72,7 @@
     {
      "data": {
       "text/plain": [
-       "'{\"automaton\": {\"nbL\": 4, \"nbS\": 5, \"initial\": {\"numpy.ndarray\": {\"values\": [-0.0004934419970497512, 0.0030634697107912346, -0.044073932015580415, -0.1077770261654714, -0.0866391379316952], \"dtype\": \"float64\"}}, \"final\": {\"numpy.ndarray\": {\"values\": [0.07757136847945045, -0.024220294003132026, -0.4468125366321221, 0.627732084089759, -0.554674433356224], \"dtype\": \"float64\"}}, \"transitions\": [{\"numpy.ndarray\": {\"values\": [[0.04512120959511772, -0.24038969827844062, 0.34944999592135334, -0.2811680730534579, -0.21402523377497645], [0.0692580056243761, -0.30062293462829204, 0.20641375368520157, -0.14960814319756124, -0.5580573163749153], [0.02980115192176571, -0.13866480809160409, 0.18362212572805459, -0.20969545230657607, -0.14481622025561292], [0.005699344003198349, -0.023385825120201414, -0.06600665373981851, 0.10749935271466007, -0.15103654604159977], [-0.02008655193147911, 0.09026347555230492, -0.005525585655539262, -0.031355317090308935, 0.2432902242047721]], \"dtype\": \"float64\"}}, {\"numpy.ndarray\": {\"values\": [[0.0774477207917058, 0.09007073705762021, -0.3047220063293013, 0.2767624549859105, 0.20289396030628148], [-0.09902980483670844, -0.08061846818727973, 0.25853170692250554, -0.12086330214608881, -0.11085207725068251], [-0.061710792028537534, -0.06244151779954751, 0.12007654564862075, 0.0025063746277943564, -0.1567967473145572], [-0.002736973749965403, -0.009005721984277787, -0.00046003295909181354, -0.008550426472005344, -0.053754646789681754], [0.030987327588710728, 0.03972680066723246, -0.04997113350910248, 0.0035769411874962344, 0.1418257620585633]], \"dtype\": \"float64\"}}, {\"numpy.ndarray\": {\"values\": [[-0.06791915236220235, -0.11357937659088102, 0.37955392604054394, -0.21784979894046635, -0.22977695089938127], [0.11596642335411328, 0.14914956804629287, -0.13357508376686902, -0.008916063072034974, 0.3484153673774836], [0.011730817547426673, 0.019273800531955612, 0.0414265834586712, -0.035346588560982, 0.02316491010895583], [0.007328911075541707, 0.005536509132796312, -0.022456082950666856, 0.03611543477693187, -0.038514339001406585], [-0.010589894686551544, -0.010626616553723532, -0.000543105645661794, -0.025567476700160314, 0.04984888818929034]], \"dtype\": \"float64\"}}, {\"numpy.ndarray\": {\"values\": [[0.07276211427780357, -0.0157195576855797, 0.07428592814590385, -0.10369861539249735, 0.024753473688328077], [-0.05607105449779142, -0.08896207276035666, 0.27638225397521243, -0.2371125582838589, 0.07372294122306285], [-0.007391294007753122, -0.048741797963875705, -0.6291239733858526, 0.46816276521577677, 0.09251699239093385], [-0.007110224931878467, -0.05623317735898056, -0.36606658567620365, -0.013297798115225407, 0.6491033177492604], [0.002335515008556511, -0.021561151264484414, 0.09096243479437888, -0.38438823493062646, 0.6616477207948602]], \"dtype\": \"float64\"}}], \"type\": \"classic\"}}'"
+       "True"
       ]
      },
      "execution_count": 7,
@@ -110,33 +81,13 @@
     }
    ],
    "source": [
-    "str1"
+    "str1 == str2"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 8,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'{\"automaton\": {\"nbL\": 4, \"nbS\": 5, \"initial\": {\"numpy.ndarray\": {\"values\": [-0.0004934419970497512, 0.0030634697107912346, -0.044073932015580415, -0.1077770261654714, -0.0866391379316952], \"dtype\": \"float64\"}}, \"final\": {\"numpy.ndarray\": {\"values\": [0.07757136847945045, -0.024220294003132026, -0.4468125366321221, 0.627732084089759, -0.554674433356224], \"dtype\": \"float64\"}}, \"transitions\": [{\"numpy.ndarray\": {\"values\": [[0.04512120959511772, -0.24038969827844062, 0.34944999592135334, -0.2811680730534579, -0.21402523377497645], [0.0692580056243761, -0.30062293462829204, 0.20641375368520157, -0.14960814319756124, -0.5580573163749153], [0.02980115192176571, -0.13866480809160409, 0.18362212572805459, -0.20969545230657607, -0.14481622025561292], [0.005699344003198349, -0.023385825120201414, -0.06600665373981851, 0.10749935271466007, -0.15103654604159977], [-0.02008655193147911, 0.09026347555230492, -0.005525585655539262, -0.031355317090308935, 0.2432902242047721]], \"dtype\": \"float64\"}}, {\"numpy.ndarray\": {\"values\": [[0.0774477207917058, 0.09007073705762021, -0.3047220063293013, 0.2767624549859105, 0.20289396030628148], [-0.09902980483670844, -0.08061846818727973, 0.25853170692250554, -0.12086330214608881, -0.11085207725068251], [-0.061710792028537534, -0.06244151779954751, 0.12007654564862075, 0.0025063746277943564, -0.1567967473145572], [-0.002736973749965403, -0.009005721984277787, -0.00046003295909181354, -0.008550426472005344, -0.053754646789681754], [0.030987327588710728, 0.03972680066723246, -0.04997113350910248, 0.0035769411874962344, 0.1418257620585633]], \"dtype\": \"float64\"}}, {\"numpy.ndarray\": {\"values\": [[-0.06791915236220235, -0.11357937659088102, 0.37955392604054394, -0.21784979894046635, -0.22977695089938127], [0.11596642335411328, 0.14914956804629287, -0.13357508376686902, -0.008916063072034974, 0.3484153673774836], [0.011730817547426673, 0.019273800531955612, 0.0414265834586712, -0.035346588560982, 0.02316491010895583], [0.007328911075541707, 0.005536509132796312, -0.022456082950666856, 0.03611543477693187, -0.038514339001406585], [-0.010589894686551544, -0.010626616553723532, -0.000543105645661794, -0.025567476700160314, 0.04984888818929034]], \"dtype\": \"float64\"}}, {\"numpy.ndarray\": {\"values\": [[0.07276211427780357, -0.0157195576855797, 0.07428592814590385, -0.10369861539249735, 0.024753473688328077], [-0.05607105449779142, -0.08896207276035666, 0.27638225397521243, -0.2371125582838589, 0.07372294122306285], [-0.007391294007753122, -0.048741797963875705, -0.6291239733858526, 0.46816276521577677, 0.09251699239093385], [-0.007110224931878467, -0.05623317735898056, -0.36606658567620365, -0.013297798115225407, 0.6491033177492604], [0.002335515008556511, -0.021561151264484414, 0.09096243479437888, -0.38438823493062646, 0.6616477207948602]], \"dtype\": \"float64\"}}], \"type\": \"classic\"}}'"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "str2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
    "outputs": [],
    "source": [
     "str3 = Serializer.data_to_yaml(sp.automaton)"
@@ -144,7 +95,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -226,6 +177,15 @@
     "print(str3)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Automaton.write(sp.automaton, train_file + \".json\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 11,
@@ -331,249 +291,59 @@
     "Ayl.transitions"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from splearn import Hankel\n",
-    "Hankel.write(sp.hankel, train_file + \"_hankel.json\", \"json\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Hb = Hankel.read(train_file + \"_hankel.json\", \"json\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Hankel equality check\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Hb == sp.hankel"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 19,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[<1310x3308 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 8251 stored elements in Dictionary Of Keys format>,\n",
-       " <1310x3308 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 2199 stored elements in Dictionary Of Keys format>,\n",
-       " <1310x3308 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 2122 stored elements in Dictionary Of Keys format>,\n",
-       " <1310x3308 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 1091 stored elements in Dictionary Of Keys format>,\n",
-       " <1310x3308 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 3489 stored elements in Dictionary Of Keys format>]"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Hb.lhankel"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[<1310x3308 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 8251 stored elements in Dictionary Of Keys format>,\n",
-       " <1310x3308 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 2199 stored elements in Dictionary Of Keys format>,\n",
-       " <1310x3308 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 2122 stored elements in Dictionary Of Keys format>,\n",
-       " <1310x3308 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 1091 stored elements in Dictionary Of Keys format>,\n",
-       " <1310x3308 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 3489 stored elements in Dictionary Of Keys format>]"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sp.hankel.lhankel"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scipy.sparse as sps\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = sp.hankel.lhankel[0]\n",
-    "k_str = \"({0:d},{1:d})\"\n",
-    "dico = dict(zip([k_str.format(i, j) for (i,j) in data.keys()], data.values()))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dok = sps.dok_matrix(data.shape, dtype=data.dtype)\n",
-    "for k, val in dico.items():\n",
-    "    k = k.replace(\"(\",\"\").replace(\")\",\"\")\n",
-    "    ind1, ind2 = k.split(\",\")\n",
-    "    dok[(int(ind1), int(ind2))] = val"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  (0, 2950)\t6.0\n",
-      "  (1, 1141)\t6.0\n",
-      "  (2, 820)\t6.0\n",
-      "  (9, 192)\t6.0\n",
-      "  (35, 75)\t6.0\n",
-      "  (123, 12)\t6.0\n",
-      "  (358, 4)\t6.0\n",
-      "  (832, 0)\t6.0\n",
-      "  (0, 20)\t551.0\n",
-      "  (1, 4)\t551.0\n",
-      "  (5, 0)\t551.0\n",
-      "  (0, 837)\t9.0\n",
-      "  (1, 212)\t9.0\n",
-      "  (4, 33)\t9.0\n",
-      "  (14, 17)\t9.0\n",
-      "  (56, 1)\t9.0\n",
-      "  (183, 0)\t9.0\n",
-      "  (0, 254)\t7.0\n",
-      "  (1, 25)\t7.0\n",
-      "  (2, 9)\t7.0\n",
-      "  (7, 1)\t7.0\n",
-      "  (26, 0)\t7.0\n",
-      "  (0, 3160)\t5.0\n",
-      "  (1, 1601)\t5.0\n",
-      "  (5, 323)\t5.0\n",
-      "  :\t:\n",
-      "  (607, 109)\t1.0\n",
-      "  (1270, 48)\t1.0\n",
-      "  (34, 2382)\t1.0\n",
-      "  (117, 1262)\t1.0\n",
-      "  (336, 580)\t1.0\n",
-      "  (761, 265)\t1.0\n",
-      "  (464, 3272)\t1.0\n",
-      "  (1015, 1821)\t1.0\n",
-      "  (338, 2911)\t1.0\n",
-      "  (770, 1090)\t1.0\n",
-      "  (0, 2926)\t1.0\n",
-      "  (1, 1113)\t1.0\n",
-      "  (2, 767)\t1.0\n",
-      "  (9, 131)\t1.0\n",
-      "  (34, 70)\t1.0\n",
-      "  (119, 7)\t1.0\n",
-      "  (343, 3)\t1.0\n",
-      "  (786, 0)\t1.0\n",
-      "  (1073, 2555)\t1.0\n",
-      "  (0, 825)\t1.0\n",
-      "  (1, 197)\t1.0\n",
-      "  (3, 80)\t1.0\n",
-      "  (13, 17)\t1.0\n",
-      "  (53, 1)\t1.0\n",
-      "  (175, 0)\t1.0\n"
+      "True\n",
+      "True\n",
+      "True\n",
+      "True\n"
      ]
     }
    ],
    "source": [
-    "print(dok)"
+    "import numpy as np\n",
+    "for i in range(4):\n",
+    "    print(np.array_equal(Ajs.transitions[i], Ayl.transitions[i]))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
     "from splearn import Hankel\n",
-    "Hankel.write(sp.hankel, train_file + \"_hankel.yaml\", \"yaml\")"
+    "Hankel.write(sp.hankel, train_file + \"_hankel.json\", \"json\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "Hb = Hankel.read(train_file + \"_hankel.yaml\", \"yaml\")"
+    "Hb = Hankel.read(train_file + \"_hankel.json\", \"json\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Hankel equality check\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
        "True"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -584,102 +354,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
-    "yamlstr = \"- scipy.dok_matrix:\\n    dtype: float64\\n    shape:\\n        tuple: [1, 1]\\n    values: {'(0,0)': 1.0}\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "- scipy.dok_matrix:\n",
-      "    dtype: float64\n",
-      "    shape:\n",
-      "        tuple: [1, 1]\n",
-      "    values: {'(0,0)': 1.0}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(yamlstr)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[<1x1 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 1 stored elements in Dictionary Of Keys format>]"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Serializer.yaml_to_data(yamlstr)"
+    "Hankel.write(sp.hankel, train_file + \"_hankel.yaml\", \"yaml\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 24,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'\"- scipy.dok_matrix:\\\\n    dtype: float64\\\\n    shape:\\\\n        tuple: [1, 1]\\\\n    values:\\\\\\n  \\\\ {\\'(0,0)\\': 1.0}\"\\n'"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "dy"
+    "Hb = Hankel.read(train_file + \"_hankel.yaml\", \"yaml\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "False"
+       "True"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "issubclass(TypeError, ValueError)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"The input data string (\" + data_str + \") should contain the following keys : \\\"\" + '\\\", \\\"'.join(keys) + \"\\\"\""
+    "Hb == sp.hankel"
    ]
   }
  ],
diff --git a/splearn/datasets/__init__.py b/splearn/datasets/__init__.py
index 1b09d4a13650cc352a8e4b04a592576072d7e6b2..f118dff034cf7297a0b143ec45dbbf186ffdeb80 100644
--- a/splearn/datasets/__init__.py
+++ b/splearn/datasets/__init__.py
@@ -1,2 +1,2 @@
 from splearn.datasets.base import *
-from splearn.datasets.data_sample import DataSample, Splearn_array
\ No newline at end of file
+from splearn.datasets.data_sample import DataSample, SplearnArray
\ No newline at end of file
diff --git a/splearn/datasets/base.py b/splearn/datasets/base.py
index 6693e2dba4d812fe559df53939dcdc1440ca8315..d7781bdd40e5dfc92641d3bd6c0a4790e511552e 100644
--- a/splearn/datasets/base.py
+++ b/splearn/datasets/base.py
@@ -3,38 +3,25 @@ import numpy as np
 from splearn.datasets.data_sample import DataSample
 
 
-def load_data_sample(adr, type='SPiCe', pickle=False):
+def load_data_sample(adr, filetype='SPiCe', pickle=False):
     """Load a sample from file and returns a dictionary
     (word,count)
 
     - Input:
 
-    :param lrows: number or list of rows,
-           a list of strings if partial=True;
-           otherwise, based on pref if version="classic" or
-           "prefix", fact otherwise
-    :type lrows: int or list of int
-    :param lcolumns: number or list of columns
-            a list of strings if partial=True ;
-            otherwise, based on suff if version="classic" or "suffix",
-            fact otherwise
-    :type lcolumns: int or list of int
-    :param string version: (default = "classic") version name
-    :param boolean partial: (default value = False) build of partial
-           if True partial dictionaries are loaded based
-           on nrows and lcolumns
+    :param str adr: address and name of the loaded file
+    :param str filetype: (default value = 'SPiCe') indicate
+           the structure of the file. Should be either 'SPiCe' or 'Pautomac'
+    :param boolean pickle: if enabled it a pickle file is created from the loaded file. Default is fault.
 
     - Output:
 
-    :returns:  nbL , nbEx , dsample , dpref , dsuff  , dfact
-    :rtype: int , int , dict , dict , dict  , dict
+    :returns: corresponding DataSample
+    :rtype: DataSample
 
 
     :Example:
 
-    Let's say you are interested in the samples 10, 25, and 50, and want to
-    know their class name.
-
     >>> from splearn.datasets.base import load_data_sample
     >>> from splearn.tests.datasets.get_dataset_path import get_dataset_path
     >>> train_file = '3.pautomac_light.train' # '4.spice.train'
@@ -54,13 +41,13 @@ def load_data_sample(adr, type='SPiCe', pickle=False):
 
     """
 
-    if type == 'SPiCe' or type == 'Pautomac':
+    if filetype == 'SPiCe' or filetype == 'Pautomac':
         data = _load_file_doublelecture(adr=adr, pickle=pickle)
         return DataSample(data=data)
 
 def _load_file_doublelecture(adr, pickle=False):
     dsample = {}  # dictionary (word,count)
-    nb_sample, max_length = _read_dimension(adr=adr)
+    _, max_length = _read_dimension(adr=adr)
     f = open(adr, "r")
     line = f.readline()
     l = line.split()
@@ -107,49 +94,6 @@ def _read_dimension(adr):
                          "do not match number of samples " + str(nb_sample))
     return nb_sample , max_length
 
-# def _load_file_1lecture(adr, pickle=False):
-#     dsample = {}  # dictionary (word,count)
-#     f = open(adr, "r")
-#     line = f.readline()
-#     l = line.split()
-#     nbEx = int(l[0])
-#     nbL = int(l[1])
-#     line = f.readline()
-#     data1 = np.zeros((0,0))
-#     length = 0
-#     while line:
-#         l = line.split()
-#         # w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]])
-#         # dsample[w] = dsample[w] + 1 if w in dsample else 1
-#         # traitement du mot vide pour les préfixes, suffixes et facteurs
-#         w = [] if int(l[0]) == 0 else [int(x) for x in l[1:]]
-#         word = np.array(w, ndmin=2, dtype=np.uint32)
-#         diff = abs(int(l[0]) - length)
-#         if len(w) > length and not np.array_equal(data1, np.zeros((0,0))):
-#             data1 = _add_empty(data1, diff)
-#         elif word.shape[0] < length and not np.array_equal(data1, np.zeros((0,0))):
-#             word = _add_empty(word, diff)
-#
-#         if np.array_equal(data1, np.zeros((0,0))):
-#             data1 = word
-#         else:
-#             data1 = np.concatenate((data1, word), axis=0)
-#         length = data1.shape[1]
-#         line = f.readline()
-#
-#     f.close()
-#     if pickle:
-#         _create_pickle_files(adr=adr, dsample=dsample)
-#     return nbL, nbEx, data1
-
-
-# def _add_empty(data, diff):
-#     empty = np.zeros((data.shape[0], diff))
-#     empty += -1
-#     data = np.concatenate((data, empty), axis=1)
-#     return data
-
-
 def _create_pickle_files(self, adr, dsample):
     f = open(adr + ".sample.pkl", "wb")
     pickle.dump(dsample, f)
diff --git a/splearn/datasets/data_sample.py b/splearn/datasets/data_sample.py
index 3122630e07138773178dcb3f6ee76fca2a19041b..0c9f31f9b88ad66e62d7f013340b523f3355d398 100644
--- a/splearn/datasets/data_sample.py
+++ b/splearn/datasets/data_sample.py
@@ -33,29 +33,58 @@
 #
 #
 # ######### COPYRIGHT #########
-"""This module contains the DataSample class and Splearn_array class
-The DataSample class encapsulates a sample 's components
-nbL and nbEx numbers,
-Splearn_array class inherit from numpy ndarray and contains a 2d data ndarray
-with the shape
+"""This module contains the DataSample class and SplearnArray class.
 
-==== ====  ====  ====  ====
-x    x     x     x     -1
-x    x     x     x     x
-x    x     -1    -1    -1
-x    -1    -1    -1    -1
--1   -1    -1    -1    -1
-==== ====  ====  ====  ====
 
-where -1 a indicates a empty cell,
-the number nbL and nbEx and , the fourth dictionaries for sample,
-prefix, suffix and factor where they are computed
 """
 import numpy as np
 
 
-class Splearn_array(np.ndarray):
-    """Splearn_array inherit from numpy ndarray
+class SplearnArray(np.ndarray):
+    """Sample data array used by the splearn spectral estimation
+
+    **SplearnArray** class inherit from numpy ndarray as a 2d data ndarray.
+    
+    Example of a possible 2d shape:
+    
+    +---+---+---+---+---+
+    |  0|  1|  0|  3| -1|
+    +---+---+---+---+---+
+    |  0|  0|  3|  3|  1|
+    +---+---+---+---+---+
+    |  1|  1| -1| -1| -1|
+    +---+---+---+---+---+
+    |  5| -1| -1| -1| -1|
+    +---+---+---+---+---+
+    | -1| -1| -1| -1| -1|
+    +---+---+---+---+---+
+    
+    is equivalent to:
+    
+    - word (0103) or abad
+    - word (00331) or aaddb
+    - word (11) or bb
+    - word (5) or f
+    - word () or empty
+    
+    Each line represents a word of the sample. The words are represented by integer letters (0->a, 1->b, 2->c ...).
+    -1 indicates the end of the word. The number of rows is the total number of words in the sample (=nbEx) and the number of columns
+    is given by the size of the longest word. Notice that the total number of words does not care about the words' duplications. 
+    If a word is duplicated in the sample, it is counted twice as two different examples. 
+    
+    The DataSample class encapsulates also the sample's parameters 'nbL', 'nbEx' (number of letters in the alphabet and 
+    number of samples) and the fourth dictionaries 'sample', 'prefix', 'suffix' and 'factor' that will be populated during the fit
+    estimations.
+    
+    - Input:
+
+    :param nd.array input_array: input ndarray that will be converted into **SplearnArray**
+    :param int nbL: the number of letters
+    :param int nbEx: total number of examples.
+    :param dict sample: the keys are the words and the values are the number of time it appears in the sample.
+    :param dict pref: the keys are the prefixes and the values are the number of time it appears in the sample.
+    :param dict suff: the keys are the suffixes and the values are the number of time it appears in the sample.
+    :param dict fact: the keys are the factors and the values are the number of time it appears in the sample.
 
     :Example:
 
@@ -66,7 +95,7 @@ class Splearn_array(np.ndarray):
     >>> print(data.__class__)
     >>> data.data
     <class 'splearn.datasets.data_sample.DataSample'>
-    GSplearn_array([[ 3.,  0.,  3., ..., -1., -1., -1.],
+    SplearnArray([[ 3.,  0.,  3., ..., -1., -1., -1.],
         [ 3.,  3., -1., ..., -1., -1., -1.],
         [ 3.,  2.,  0., ..., -1., -1., -1.],
         ...,
@@ -96,150 +125,15 @@ class Splearn_array(np.ndarray):
         self.suff = getattr(obj, 'suff', None)
         self.fact = getattr(obj, 'fact', None)
 
-    # def select_rows(self, nb_rows_max=1000, version='classic'):
-    #     """define lrows
-    #
-    #     - Input:
-    #
-    #     :param int nb_rows_max:  (default = 1000) number of maximum rows
-    #     :param string version: (default = "classic") version name
-    #
-    #     - Output:
-    #
-    #     :returns: list lrows,  list of rows
-    #     :rtype: list
-    #     """
-    #     lRows = []  # liste à renvoyer
-    #     nbRows = 0
-    #     lLeafs = [([], self.nbEx )]
-    #     #  pref[()]la liste de couples (prefixes frontières, nb occ)
-    #     # initialisée au prefixe vide
-    #     if version == 'classic':
-    #         while lLeafs and nbRows < nb_rows_max:
-    #             lastWord = lLeafs.pop()[
-    #                 0]  # le prefixe frontière le plus fréquent
-    #             lRows.append(tuple(lastWord))
-    #             nbRows += 1
-    #             for i in range(self.nbL):
-    #                 newWord = lastWord + [i]  # successeur de lastword
-    #                 tnewWord = tuple(newWord)  # tuple associé
-    #                 if tnewWord in self.pref:
-    #                     # ajout d'un nouveau prefixe frontière
-    #                     lLeafs.append((newWord, self.pref[tnewWord]))
-    #             lLeafs = sorted(lLeafs, key=lambda x: x[1])
-    #     elif version == 'prefix':
-    #         while lLeafs and nbRows < nb_rows_max:
-    #             lastWord = lLeafs.pop()[
-    #                 0]  # le prefixe frontière le plus fréquent
-    #             lRows.append(tuple(lastWord))
-    #             nbRows += 1
-    #             for i in range(self.nbL):
-    #                 newWord = lastWord + [i]  # successeur de lastword
-    #                 tnewWord = tuple(newWord)  # tuple associé
-    #                 if tnewWord in self.pref:
-    #                     # ajout d'un nouveau prefixe frontière
-    #                     nb = 0
-    #                     for u in self.sample:
-    #                         if tnewWord <= u:
-    #                             nb += self.sample[u] * (
-    #                             len(u) - len(tnewWord) + 1)
-    #                     lLeafs.append((newWord, nb))
-    #             lLeafs = sorted(lLeafs, key=lambda x: x[1])
-    #     elif version == 'factor':
-    #         while lLeafs and nbRows < nb_rows_max:
-    #             lastWord = lLeafs.pop()[
-    #                 0]  # le prefixe frontière le plus fréquent
-    #             lRows.append(tuple(lastWord))
-    #             nbRows += 1
-    #             for i in range(self.nbL):
-    #                 newWord = lastWord + [i]  # successeur de lastword
-    #                 tnewWord = tuple(newWord)  # tuple associé
-    #                 if tnewWord in self.fact:
-    #                     # ajout d'un nouveau prefixe frontière
-    #                     nb = 0
-    #                     lw = len(tnewWord)
-    #                     for u in self.sample:
-    #                         if len(u) >= lw:
-    #                             for i in range(lw, len(u) + 1):
-    #                                 if u[:i][-lw:] == tnewWord:
-    #                                     nb += self.sample[u] * (len(u) - i + 1)
-    #                     lLeafs.append((newWord, nb))
-    #             lLeafs = sorted(lLeafs, key=lambda x: x[1])
-    #             # print(lLeafs)
-    #     return lRows
-
-    # def select_columns(self, nb_columns_max=1000, version='classic'):
-    #     """define lcolumns
-    #
-    #     - Input:
-    #
-    #     :param int nb_columns_max:  (default = 1000) number of maximum columns
-    #     :param string version: (default = "classic") version name
-    #
-    #     - Output:
-    #
-    #     :returns:list lcolumns,  list of columns
-    #     :rtype: list
-    #     """
-    #     lColumns = []  # liste à renvoyer
-    #     lLeafs = [([], self.nbEx)]  # la liste de couples (suffixes frontières,
-    #     #  nb occ) initialisée au suffixe vide
-    #
-    #     nbColumns = 0
-    #     if version == 'classic':
-    #          while lLeafs and nbColumns < nb_columns_max:
-    #             lastWord = lLeafs.pop()[
-    #                 0]  # le suffixe frontière le plus fréquent
-    #             lColumns.append(tuple(lastWord))
-    #             nbColumns += 1
-    #             for i in range(self.nbL):
-    #                 newWord = lastWord + [i]  # successeur de lastword
-    #                 tnewWord = tuple(newWord)  # tuple associé
-    #                 if tnewWord in self.suff:
-    #                     # ajout d'un nouveau suffixe frontière
-    #                     lLeafs.append((newWord, self.suff[tnewWord]))
-    #             lLeafs = sorted(lLeafs, key=lambda x: x[
-    #                 1])  # suffixe le plus fréquent en dernier
-    #             # print(lLeafs)
-    #     elif version == 'prefix':
-    #         while lLeafs and nbColumns < nb_columns_max:
-    #             lastWord = lLeafs.pop()[
-    #                 0]  # le prefixe frontière le plus fréquent
-    #             lColumns.append(tuple(lastWord))
-    #             nbColumns += 1
-    #             for i in range(self.nbL):
-    #                 newWord = lastWord + [i]  # successeur de lastword
-    #                 tnewWord = tuple(newWord)  # tuple associé
-    #                 if tnewWord in self.fact:
-    #                     # ajout d'un nouveau suffixe frontière
-    #                     lLeafs.append((newWord, self.fact[tnewWord]))
-    #             lLeafs = sorted(lLeafs, key=lambda x: x[1])
-    #     elif version == 'factor':
-    #         while lLeafs and nbColumns < nb_columns_max:
-    #             lastWord = lLeafs.pop()[
-    #                 0]  # le prefixe frontière le plus fréquent
-    #             lColumns.append(tuple(lastWord))
-    #             nbColumns += 1
-    #             for i in range(self.nbL):
-    #                 newWord = lastWord + [i]  # successeur de lastword
-    #                 tnewWord = tuple(newWord)  # tuple associé
-    #                 if tnewWord in self.fact:
-    #                     # ajout d'un nouveau prefixe frontière
-    #                     nb = 0
-    #                     lw = len(tnewWord)
-    #                     for u in self.sample:
-    #                         if len(u) >= lw:
-    #                             for i in range(lw, len(u) + 1):
-    #                                 if u[:i][-lw:] == tnewWord:
-    #                                     nb += self.sample[u] * (i - lw + 1)
-    #                     lLeafs.append((newWord, nb))
-    #             lLeafs = sorted(lLeafs, key=lambda x: x[1])
-    #             # print(lLeafs)
-    #     return lColumns
-
 class DataSample(dict):
     """ A DataSample instance
 
+    - Input:
+
+    :param tuple data: a tuple of (int, int, numpy.array) for the corresponding three elements
+        (nbL, nbEx, data) where nbL is the number of letters in the alphabet, nbEx is the number
+        of samples and data is the 2d data array
+
     :Example:
 
     >>> from splearn.datasets.base import load_data_sample
@@ -254,46 +148,20 @@ class DataSample(dict):
     5000
     >>> data.data
 
-    - Input:
-
-    :param string adr: adresse and name of the loaden file
-    :param string type: (default value = 'SPiCe') indicate
-           the structure of the file
-    :param lrows: number or list of rows,
-           a list of strings if partial=True;
-           otherwise, based on self.pref if version="classic" or
-           "prefix", self.fact otherwise
-    :type lrows: int or list of int
-    :param lcolumns: number or list of columns
-           a list of strings if partial=True ;
-           otherwise, based on self.suff if version="classic" or "suffix",
-           self.fact otherwise
-    :type lcolumns: int or list of int
-    :param string version: (default = "classic") version name
-    :param boolean partial: (default value = False) build of partial
-
     """
 
-    def __init__(self, data=None, type='SPiCe', **kwargs):
-
-        # Size of the alphabet
-        self._nbL = 0
-        # Number of samples
-        self._nbEx = 0
+    def __init__(self, data=None, **kwargs):
         # The dictionary that contains the sample
-        self._data = Splearn_array(np.zeros((0,0)))
+        self._data = SplearnArray(np.zeros((0,0)))
         if data is not None:
-            self.nbL = data[0]
-            self.nbEx = data[1]
-            self.data = Splearn_array(data[2], nbL=data[0], nbEx=data[1])
-
+            self.data = SplearnArray(data[2], nbL=data[0], nbEx=data[1])
         super(DataSample, self).__init__(kwargs)
 
 
     @property
     def nbL(self):
         """Number of letters"""
-        return self._nbL
+        return self.data.nbL
 
     @nbL.setter
     def nbL(self, nbL):
@@ -302,13 +170,12 @@ class DataSample(dict):
         if nbL < 0:
             raise ValueError("The size of the alphabet should " +
                              "an integer >= 0")
-        self._nbL = nbL
+        self.data.nbL = nbL
 
     @property
     def nbEx(self):
         """Number of examples"""
-
-        return self._nbEx
+        return self.data.nbEx
 
     @nbEx.setter
     def nbEx(self, nbEx):
@@ -317,21 +184,17 @@ class DataSample(dict):
         if nbEx < 0:
             raise ValueError("The number of examples should be " +
                              " an integer >= 0")
-        self._nbEx = nbEx
+        self.data.nbEx = nbEx
 
     @property
     def data(self):
-        """Splearn_array"""
-
+        """SplearnArray"""
         return self._data
 
     @data.setter
     def data(self, data):
-        if isinstance(data, (Splearn_array, np.ndarray, np.generic)):
+        if isinstance(data, (SplearnArray, np.ndarray, np.generic)):
             self._data = data
         else:
-            raise TypeError("sample should be a Splearn_array.")
-
-
-
+            raise TypeError("sample should be a SplearnArray.")
 
diff --git a/splearn/hankel.py b/splearn/hankel.py
index 6e629a265785726d476e2749a40a55088704ddf7..627203eb19c9116da30ee6f903faec3f7a8fe87c 100644
--- a/splearn/hankel.py
+++ b/splearn/hankel.py
@@ -44,21 +44,9 @@ import numpy as np
 class Hankel(object):
     """ A Hankel instance , compute the list of Hankel matrices
 
-    :Example:
-    
-    >>> from splearn import Learning, Hankel , Spectral
-    >>> train_file = '0.spice.train'
-    >>> pT = load_data_sample(adr=train_file)
-    >>> sp = Spectral()
-    >>> sp.fit(X=pT.data)
-    >>> lhankel = Hankel( sample_instance=pT.sample,
-    >>>                   nbL=pT.nbL, nbEx=pT.nbEx,
-    >>>                   lrows=6, lcolumns=6, version="classic",
-    >>>                   partial=True, sparse=True, mode_quiet=True).lhankel
-    
     - Input:
     
-    :param Splearn_array sample_instance: instance of Splearn_array
+    :param SplearnArray sample_instance: instance of SplearnArray
     :param lrows: number or list of rows,
            a list of strings if partial=True;
            otherwise, based on self.pref if version="classic" or
@@ -79,6 +67,19 @@ class Hankel(object):
            *sample_instance* or *lhankel* has to be not None. If *sample_instance* is given,
            the **Hankel** instance is built directly from the sample dictionnary,
            else it is deduced from the *lhankels* list of matrices. 
+
+    :Example:
+    
+    >>> from splearn import Learning, Hankel , Spectral
+    >>> train_file = '0.spice.train'
+    >>> pT = load_data_sample(adr=train_file)
+    >>> sp = Spectral()
+    >>> sp.fit(X=pT.data)
+    >>> lhankel = Hankel( sample_instance=pT.sample,
+    >>>                   nbL=pT.nbL, nbEx=pT.nbEx,
+    >>>                   lrows=6, lcolumns=6, version="classic",
+    >>>                   partial=True, sparse=True, mode_quiet=True).lhankel
+
     """
 
     def __init__(
@@ -177,6 +178,8 @@ class Hankel(object):
     
     @property
     def build_from_sample(self):
+        """Boolean that indicates if the matrices have been build form sample or not
+        (directly build from an Automaton in this case) """
         return self._build_from_sample
     
     @build_from_sample.setter
@@ -193,10 +196,10 @@ class Hankel(object):
 
         - Input:
 
-        :param dict sample: sample dictionary
-        :param dict pref: prefix dictionary
-        :param dict suff: suffix dictionary
-        :param dict fact: factor dictionary
+        :param dict sample: the keys are the words and the values are the number of time it appears in the sample.
+        :param dict pref: the keys are the prefixes and the values are the number of time it appears in the sample.
+        :param dict suff: the keys are the suffixes and the values are the number of time it appears in the sample.
+        :param dict fact: the keys are the factors and the values are the number of time it appears in the sample.
         :param lrows: number or list of rows,
                a list of strings if partial=True;
                otherwise, based on self.pref if version="classic" or
diff --git a/splearn/spectral.py b/splearn/spectral.py
index 1bc76ee2e0f8d951992035514cf4a3009d2e5e85..bf56fa6ea2c8c894530634e1fb9d404477f9f45c 100644
--- a/splearn/spectral.py
+++ b/splearn/spectral.py
@@ -41,7 +41,10 @@
 from __future__ import division, print_function
 import numpy as np
 import math
-from splearn.datasets.data_sample import  Splearn_array
+import threading
+lock = threading.Lock()
+
+from splearn.datasets.data_sample import SplearnArray
 from splearn.hankel import Hankel
 from sklearn.base import BaseEstimator
 from sklearn.utils import check_array
@@ -51,29 +54,6 @@ import warnings
 class Spectral(BaseEstimator):
     """A Spectral estimator instance
 
-    :Example:
-
-    >>> from splearn.spectral import Spectral
-    >>> sp = Spectral()
-    >>> sp.set_params(partial=True, lcolumns=6, lrows=6, smooth_method='trigram')
-    Spectral(lcolumns=6, lrows=6, mode_quiet=False, partial=True, rank=5,
-     smooth_method='trigram', sparse=True, version='classic')
-    >>> sp.fit(data.data)
-    Start Hankel matrix computation
-    End of Hankel matrix computation
-    Start Building Automaton from Hankel matrix
-    End of Automaton computation
-    Spectral(lcolumns=6, lrows=6, partial=True, rank=5, smooth_method='trigram', sparse=True, version='classic')
-    >>> sp.automaton.initial
-    array([-0.00049249,  0.00304676, -0.04405996, -0.10765322, -0.08660063])
-    >>> sp.predict(data.data)
-    array([  4.38961058e-04,   1.10616861e-01,   1.35569353e-03, ...,
-        4.66041996e-06,   4.68177275e-02,   5.24287604e-20])
-    >>> sp.loss(data.data, normalize=True)
-    -10.530029936056017
-    >>> sp.score(data.data)
-    10.530029936056017
-
     - Input:
 
     :param int rank: the ranking number
@@ -106,6 +86,28 @@ class Spectral(BaseEstimator):
     :param boolean mode_quiet: (default value = False) True for no
            output message.
 
+    :Example:
+
+    >>> from splearn.spectral import Spectral
+    >>> sp = Spectral()
+    >>> sp.set_params(partial=True, lcolumns=6, lrows=6, smooth_method='trigram')
+    Spectral(lcolumns=6, lrows=6, mode_quiet=False, partial=True, rank=5,
+     smooth_method='trigram', sparse=True, version='classic')
+    >>> sp.fit(data.data)
+    Start Hankel matrix computation
+    End of Hankel matrix computation
+    Start Building Automaton from Hankel matrix
+    End of Automaton computation
+    Spectral(lcolumns=6, lrows=6, partial=True, rank=5, smooth_method='trigram', sparse=True, version='classic')
+    >>> sp.automaton.initial
+    array([-0.00049249,  0.00304676, -0.04405996, -0.10765322, -0.08660063])
+    >>> sp.predict(data.data)
+    array([  4.38961058e-04,   1.10616861e-01,   1.35569353e-03, ...,
+        4.66041996e-06,   4.68177275e-02,   5.24287604e-20])
+    >>> sp.loss(data.data, normalize=True)
+    -10.530029936056017
+    >>> sp.score(data.data)
+    10.530029936056017
 
     """
     def __init__(self,  rank=5, lrows=7, lcolumns=7,
@@ -172,8 +174,7 @@ class Spectral(BaseEstimator):
             self.smooth = 0
 
     def set_params(self, **parameters):
-        """
-        set the values of  Spectral estimator parameters
+        """set the values of  Spectral estimator parameters
 
         - Output:
 
@@ -186,12 +187,12 @@ class Spectral(BaseEstimator):
                 self._rule_smooth_method(value)
         return self
 
-    def fit(self, X, y=None):   #, gram
+    def fit(self, X, y=None):
         """Fit the model
 
         - Input:
 
-        :param Splearn_array X: object of shape [n_samples,n_features]
+        :param SplearnArray X: object of shape [n_samples,n_features]
                Training data
         :param ndarray y: (default value = None) not used by Spectral estimator
                numpy array of shape [n_samples] Target values
@@ -206,11 +207,11 @@ class Spectral(BaseEstimator):
         """
         check_array(X)
 
-        if not isinstance(X, Splearn_array):
+        if not isinstance(X, SplearnArray):
             self._hankel = None
             self._automaton = None
             return self
-        X = self._polulate_dictionnaries(X)
+        X = self.polulate_dictionnaries(X)
         self._hankel = Hankel(sample_instance=X,
                          lrows=self.lrows, lcolumns=self.lcolumns,
                          version=self.version,
@@ -232,8 +233,108 @@ class Spectral(BaseEstimator):
             dsample[w] = dsample[w] + 1 if w in dsample else 1
         return dsample
 
-    def _polulate_dictionnaries(self, X):
-        if not isinstance(X, Splearn_array):
+#     def _populate_new_word(self, X, i, lrowsmax=None, version_rows_int=None,
+#                            lcolumnsmax=None, version_columns_int=None, lmax=None):
+#         w = X[i, :]
+#         w = w[w >= 0]
+#         w = tuple([int(x) for x in w[0:]])
+#         with lock:
+#             X.sample[w] = X.sample.setdefault(w, 0) + 1
+#         if self.version == "prefix" or self.version == "classic":
+#             # empty word treatment for prefixe, suffix, and factor dictionnaries
+#             with lock:
+#                 X.pref[()] = X.pref[()] + 1 if () in X.pref else 1
+#         if self.version == "suffix" or self.version == "classic":
+#             with lock:
+#                 X.suff[()] = X.suff[()] + 1 if () in X.suff else 1
+#         if self.version == "factor" or self.version == "suffix" \
+#                 or self.version == "prefix":
+#             with lock:
+#                 X.fact[()] = X.fact[()] + len(w) + 1 if () in X.fact else len(w) + 1
+# 
+#         if self.partial:
+#             for i in range(len(w)):
+#                 if self.version == "classic":
+#                     if (version_rows_int is True and
+#                                     i + 1 <= lrowsmax) or \
+#                        (version_rows_int is False and
+#                                     w[:i + 1] in self.lrows):
+#                         with lock:
+#                             X.pref[w[:i + 1]] = \
+#                                 X.pref[w[:i + 1]] + 1 if w[:i + 1] in X.pref else 1
+#                     if (version_columns_int is True and i + 1 <= lcolumnsmax) or \
+#                        (version_columns_int is False and w[-( i + 1):] in self.lcolumns):
+#                         with lock:
+#                             X.suff[w[-(i + 1):]] = X.suff[w[-(i + 1):]] + 1 if \
+#                                 w[-(i + 1):] in X.suff else 1
+#                 if self.version == "prefix":
+#                     # dictionaries dpref is populated until
+#                     # lmax = lrows + lcolumns
+#                     # dictionaries dfact is populated until lcolumns
+#                     if ((version_rows_int is True or
+#                                  version_columns_int is True) and
+#                                     i + 1 <= lmax) or \
+#                             (version_rows_int is False and
+#                                  (w[:i + 1] in self.lrows)) or \
+#                             (version_columns_int is False and
+#                                  (w[:i + 1] in self.lcolumns)):
+#                         X.pref[w[:i + 1]] = X.pref[w[:i + 1]] + 1 \
+#                             if w[:i + 1] in X.pref else 1
+#                     for j in range(i + 1, len(w) + 1):
+#                         if (version_columns_int is True and (
+#                             j - i) <= lmax) or \
+#                                 (version_columns_int is False and
+#                                      (w[i:j] in self.lcolumns)):
+#                             X.fact[w[i:j]] = X.fact[w[i:j]] + 1 \
+#                                 if w[i:j] in X.fact else 1
+#                 if self.version == "suffix":
+#                     if ((version_rows_int is True or
+#                                  version_columns_int is True) and
+#                                 i <= lmax) or \
+#                             (version_rows_int is False and
+#                                  (w[-(i + 1):] in self.lrows)) or \
+#                             (version_columns_int is False and
+#                                  (w[-(i + 1):] in self.lcolumns)):
+#                         X.suff[w[-(i + 1):]] = X.suff[w[-(i + 1):]] + 1 \
+#                             if w[-(i + 1):] in X.suff else 1
+#                     for j in range(i + 1, len(w) + 1):
+#                         if (version_rows_int is True and (
+#                             j - i) <= lmax) or \
+#                                 (version_rows_int is False and
+#                                      (w[i:j] in self.lrows)):
+#                             X.fact[w[i:j]] = X.fact[w[i:j]] + 1 \
+#                                 if w[i:j] in X.fact else 1
+#                 if self.version == "factor":
+#                     for j in range(i + 1, len(w) + 1):
+#                         if ((version_rows_int is True or
+#                                      version_columns_int is True) and
+#                                     (j - i) <= lmax) or \
+#                                 (version_rows_int is False and
+#                                      (w[i:j] in self.lrows)) or \
+#                                 (version_columns_int is False and
+#                                      (w[i:j] in self.lcolumns)):
+#                             X.fact[w[i:j]] = \
+#                                 X.fact[w[i:j]] + 1 if w[i:j] in X.fact else 1
+# 
+#         else:  # not partial
+#             for i in range(len(w)):
+#                 X.pref[w[:i + 1]] = X.pref[w[:i + 1]] + 1 \
+#                     if w[:i + 1] in X.pref else 1
+#                 X.suff[w[i:]] = X.suff[w[i:]] + 1 if w[i:] in X.suff else 1
+#                 for j in range(i + 1, len(w) + 1):
+#                     X.fact[w[i:j]] = X.fact[w[i:j]] + 1 \
+#                         if w[i:j] in X.fact else 1
+
+    def polulate_dictionnaries(self, X):
+        """Populates the *sample*, *pref*, *suff*, *fact* dictionnaries of X
+                
+        - Input:
+
+        :param SplearnArray X: object of shape [n_samples,n_features]
+               Training data
+        
+        """
+        if not isinstance(X, SplearnArray):
             return X
         dsample = {}  # dictionary (word,count)
         dpref = {}  # dictionary (prefix,count)
@@ -459,7 +560,7 @@ class Spectral(BaseEstimator):
 
         - Input:
 
-        :param Splearn_array X : of shape data shape = (n_samples, n_features)
+        :param SplearnArray X : of shape data shape = (n_samples, n_features)
                Samples.
 
 
@@ -489,7 +590,7 @@ class Spectral(BaseEstimator):
 
         - Input:
 
-        :param Splearn_array X : Samples, data shape = (n_samples, n_features)
+        :param SplearnArray X : Samples, data shape = (n_samples, n_features)
 
 
         - Output:
@@ -537,18 +638,17 @@ class Spectral(BaseEstimator):
         return Y
 
     def loss(self, X, y=None, normalize=True):
-        """
-        Log probability using the Spectral model
+        """Log probability using the Spectral model
 
         - Input:
 
-        :param Splearn_array X : of shape data shape = (n_samples, n_features)
+        :param SplearnArray X: of shape data shape = (n_samples, n_features)
                Samples. X is validation data.
-        :param ndarray y : (default value = Null)
+        :param ndarray y: (default value = Null)
                numpy array of shape [n_samples] Target values,
                is the ground truth target for X (in the supervised case) or
                None (in the unsupervised case)
-        :param boolean normalize (default value = True) calculation are
+        :param boolean normalize: (default value = True) calculation are
                performed and normalize by the number of sample in case of True
 
         - Output:
@@ -584,7 +684,7 @@ class Spectral(BaseEstimator):
 
         - Input:
 
-        :param Splearn_array X: of shape data shape = (n_samples, n_features)
+        :param SplearnArray X: of shape data shape = (n_samples, n_features)
                Samples.
         :param ndarray y: (default value = None)
                numpy array of shape [n_samples] Target values,
diff --git a/splearn/tests/test_data_sample.py b/splearn/tests/test_data_sample.py
index 1eae3ba20ab49018b0ed668c0593104487bcba93..affb871f03f7f1a339efb30c432b55fd7abea1cc 100644
--- a/splearn/tests/test_data_sample.py
+++ b/splearn/tests/test_data_sample.py
@@ -38,7 +38,7 @@ from __future__ import division, print_function
 import numpy as np
 import unittest
 from splearn.datasets.base import load_data_sample
-from splearn.datasets.data_sample import DataSample, Splearn_array
+from splearn.datasets.data_sample import DataSample, SplearnArray
 from splearn.tests.datasets.get_dataset_path import get_dataset_path
 from splearn.spectral import Spectral
 
@@ -68,7 +68,7 @@ class UnitaryTest(unittest.TestCase):
 
         s = load_data_sample(adr=adr)
         cl = Spectral()
-        cl._polulate_dictionnaries(s.data)
+        cl.polulate_dictionnaries(s.data)
         self.assertEqual(s.nbL,s.data.nbL)
         self.assertEqual(s.nbEx, s.data.nbEx)
         with self.assertRaises(TypeError):
@@ -88,7 +88,7 @@ class UnitaryTest(unittest.TestCase):
 
         data = load_data_sample(adr=adr)
         cl = Spectral(partial=False)
-        cl._polulate_dictionnaries(data.data)
+        cl.polulate_dictionnaries(data.data)
         nbL = data.data.nbL
         nbEx = data.data.nbEx
         sample = data.data .sample
@@ -107,7 +107,7 @@ class UnitaryTest(unittest.TestCase):
         self.assertEqual(nbSuff1, nbSuff2)
 
         cl = Spectral(version = 'factor', partial=False)
-        cl._polulate_dictionnaries(data.data)
+        cl.polulate_dictionnaries(data.data)
         fact = data.data.fact
         nbFact1 = sum([sample[w]*(len(w)+1)*(len(w)+2)/2 for w in sample])
         nbFact2 = sum([fact[w] for w in fact])
@@ -117,7 +117,7 @@ class UnitaryTest(unittest.TestCase):
         adr = get_dataset_path("0.spice.train")
         pT = load_data_sample(adr=adr)
         cl = Spectral(partial=False)
-        cl._polulate_dictionnaries(pT.data)
+        cl.polulate_dictionnaries(pT.data)
         # lR = pT.data.select_rows(nb_rows_max = 10, version = 'classic')
         # lC = pT.data.select_columns(nb_columns_max = 10, version = 'classic')
         # self.assertEqual(lR, [(), (3,), (3, 0), (3, 3), (3, 0, 3), (3, 1),
@@ -127,7 +127,7 @@ class UnitaryTest(unittest.TestCase):
         #                   (1,), (1, 3), (3, 0, 3)])
 
         cl = Spectral(version = 'prefix', partial=False)
-        cl._polulate_dictionnaries(pT.data)
+        cl.polulate_dictionnaries(pT.data)
         # lRp = pT.data.select_rows(nb_rows_max = 10, version = 'prefix')
         # lCp = pT.data.select_columns(nb_columns_max = 10, version = 'prefix')
         # self.assertEqual(lRp, [(), (3,), (3, 0), (3, 0, 0), (3, 0, 1),
@@ -137,7 +137,7 @@ class UnitaryTest(unittest.TestCase):
         #                    (0, 3), (1, 3), (3, 1)])
 
         cl = Spectral(version = 'factor', partial=False)
-        cl._polulate_dictionnaries(pT.data)
+        cl.polulate_dictionnaries(pT.data)
         # lRf = pT.data.select_rows(nb_rows_max = 10, version = 'factor')
         # lCf = pT.data.select_columns(nb_columns_max = 10, version = 'factor')
         # self.assertEqual(lRf,  [(), (3,), (0,), (1,), (3, 0), (3, 3), (2,),
diff --git a/splearn/tests/test_hankel.py b/splearn/tests/test_hankel.py
index 2f2a4bc1cb6d43aa8faf681db67347f50ab6eded..a1cb6c27bf1aab24b19e1c09f7bbe8420f86112d 100644
--- a/splearn/tests/test_hankel.py
+++ b/splearn/tests/test_hankel.py
@@ -53,7 +53,7 @@ class HankelTest(unittest.TestCase):
         # adr = get_dataset_path("3.pautomac.train")
         data = load_data_sample(adr=adr)
         cl = Spectral(partial=False)
-        cl._polulate_dictionnaries(data.data)
+        cl.polulate_dictionnaries(data.data)
         lprefix = [()]
         lprefix = lprefix + [(i,) for i in range(data.data.nbL)]
         lprefix = lprefix+[(i, j) for i in range(data.data.nbL)
@@ -123,7 +123,7 @@ class HankelTest(unittest.TestCase):
         # adr = get_dataset_path("3.pautomac.train")
         data = load_data_sample(adr=adr)
         cl = Spectral(partial=False, version="prefix")
-        cl._polulate_dictionnaries(data.data)
+        cl.polulate_dictionnaries(data.data)
         lprefix = [()]
         lprefix = lprefix + [(i,) for i in range(data.data.nbL)]
         lprefix = lprefix + [(i, j) for i in range(data.data.nbL)
@@ -196,7 +196,7 @@ class HankelTest(unittest.TestCase):
         # adr = get_dataset_path("3.pautomac.train")
         data = load_data_sample(adr=adr)
         cl = Spectral(partial=False, version="suffix")
-        cl._polulate_dictionnaries(data.data)
+        cl.polulate_dictionnaries(data.data)
         lprefix = [()]
         lprefix = lprefix + [(i,) for i in range(data.data.nbL)]
         lprefix = lprefix + [(i, j) for i in range(data.data.nbL)
@@ -266,7 +266,7 @@ class HankelTest(unittest.TestCase):
         # adr = get_dataset_path("3.pautomac.train")
         data = load_data_sample(adr=adr)
         cl = Spectral(partial=False, version="factor")
-        cl._polulate_dictionnaries(data.data)
+        cl.polulate_dictionnaries(data.data)
         lprefix = [()]
         lprefix = lprefix + [(i,) for i in range(data.data.nbL)]
         lprefix = lprefix + [(i, j) for i in range(data.data.nbL)
@@ -336,7 +336,7 @@ class HankelTest(unittest.TestCase):
         # adr = get_dataset_path("3.pautomac.train")
         data = load_data_sample(adr=adr)
         cl = Spectral()
-        cl._polulate_dictionnaries(data.data)
+        cl.polulate_dictionnaries(data.data)
         h = Hankel(sample_instance=data.data, lrows=1, lcolumns=1,
             version="classic", partial=False, sparse=False)
         with self.assertRaises(TypeError):
@@ -349,7 +349,7 @@ class HankelTest(unittest.TestCase):
         adr = get_dataset_path("essai")
         data = load_data_sample(adr=adr)
         cl = Spectral()
-        cl._polulate_dictionnaries(data.data)
+        cl.polulate_dictionnaries(data.data)
         h1 = Hankel(sample_instance=data.data, lrows=1, lcolumns=1,
                     version="classic", partial=False, sparse=False)
         h2 = Hankel(sample_instance=data.data, lrows=1, lcolumns=1,
diff --git a/splearn/tests/test_spectral.py b/splearn/tests/test_spectral.py
index 3ab9abb16289316d96c1c581c415ec19cb70b5d3..314aa17b4f0724699e488314f9c2d4d246a9bc94 100644
--- a/splearn/tests/test_spectral.py
+++ b/splearn/tests/test_spectral.py
@@ -41,9 +41,10 @@ from splearn.datasets.base import load_data_sample
 from splearn.automaton import Automaton
 from splearn.spectral import Spectral
 from splearn.tests.datasets.get_dataset_path import get_dataset_path
+from sklearn.linear_model.tests.test_passive_aggressive import random_state
 
 
-class UnitaryTest(unittest.TestCase):
+class SpectralTest(unittest.TestCase):
 
     def test_version(self):
         adr = get_dataset_path("essai")
@@ -238,6 +239,25 @@ class UnitaryTest(unittest.TestCase):
         np.testing.assert_almost_equal(A.val([0, 1, 0, 1, 1]),
                                        B.val([0, 1, 0, 1, 1]))
 
+    def test_sklearn_compatibility(self):
+        from sklearn.utils.estimator_checks import check_estimator
+        from sklearn.model_selection import train_test_split, cross_val_score
+        check_estimator(Spectral)
+        adr = get_dataset_path("3.pautomac_light.train")
+        data = load_data_sample(adr=adr)
+        sp = Spectral(lrows=6, lcolumns=6, rank = 5, sparse=False,
+                      partial=True, smooth_method='trigram')
+        X_train, X_test = train_test_split(data.data, test_size=0.4, random_state=0)
+        sp.fit(X_train)
+        single_predicted_weights = sp.predict(X_test)
+        print(single_predicted_weights)
+        self.assertAlmostEqual(single_predicted_weights[0], 6.76217667e-02, delta = 1e-5)
+        scores = cross_val_score(sp, data.data, cv=4)
+        print(scores)
+        scores_expected = [-10.65272755, -10.7090267,  -10.78404758, -11.08453211]
+        for s1, s2 in zip(scores, scores_expected):
+            self.assertAlmostEqual(s1, s2, delta=0.1)
+
 #     def test_Perplexity(self):
 #         adr = get_dataset_path("3.pautomac")
 #         P = Learning()