From 3d0e3ae5a46dd9d8d7d7fcde97d01f92dd24c4c9 Mon Sep 17 00:00:00 2001
From: Denis Arrivault <denis.arrivault@lif.univ-mrs.fr>
Date: Tue, 27 Feb 2018 18:32:18 +0100
Subject: [PATCH] GPU tests for svd in prgress

---
 examples/PythonOptimizations.ipynb   | 394 +++++++++++++++++++++++++++
 examples/performances_calculation.py |  46 ++--
 splearn/hankel.py                    |  18 +-
 splearn/spectral.py                  | 232 +---------------
 4 files changed, 447 insertions(+), 243 deletions(-)
 create mode 100644 examples/PythonOptimizations.ipynb

diff --git a/examples/PythonOptimizations.ipynb b/examples/PythonOptimizations.ipynb
new file mode 100644
index 0000000..386c4d3
--- /dev/null
+++ b/examples/PythonOptimizations.ipynb
@@ -0,0 +1,394 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from timeit import default_timer as timer\n",
+    "import random\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "*** With setdefault ***\n",
+      "Mean time = 2.9573e-07 +/- 5.1021e-07 sigma\n",
+      "{33: 969, 23: 1030, 10: 1002, 54: 990, 37: 1055, 28: 983, 34: 964, 25: 1023, 55: 1016, 46: 958, 17: 979, 0: 1003, 98: 1025, 19: 977, 66: 1002, 73: 979, 42: 1038, 52: 986, 80: 1007, 45: 1044, 62: 942, 40: 1001, 77: 996, 26: 995, 11: 971, 97: 985, 59: 1024, 63: 939, 43: 1036, 88: 977, 47: 967, 44: 964, 93: 967, 51: 962, 69: 1003, 12: 988, 81: 1008, 82: 962, 61: 1003, 30: 985, 3: 1017, 79: 975, 29: 979, 35: 989, 31: 1019, 72: 1017, 78: 977, 100: 977, 7: 947, 91: 973, 90: 966, 38: 960, 20: 927, 8: 983, 75: 999, 92: 1020, 99: 1030, 64: 976, 57: 1030, 86: 1041, 16: 941, 39: 1017, 94: 996, 18: 940, 13: 1024, 27: 990, 53: 1042, 68: 952, 2: 1031, 85: 984, 21: 1009, 36: 1009, 71: 990, 84: 1022, 49: 1010, 60: 990, 70: 967, 9: 949, 67: 989, 87: 980, 74: 970, 32: 983, 89: 999, 96: 998, 50: 991, 56: 1010, 76: 1001, 95: 964, 24: 1004, 14: 950, 83: 1053, 5: 988, 22: 1043, 65: 980, 6: 967, 15: 966, 41: 922, 1: 960, 48: 958, 58: 955, 4: 999}\n",
+      "*** With if syntax ***\n",
+      "Mean time = 2.9544e-07 +/- 5.2105e-07 sigma\n",
+      "{33: 969, 23: 1030, 10: 1002, 54: 990, 37: 1055, 28: 983, 34: 964, 25: 1023, 55: 1016, 46: 958, 17: 979, 0: 1003, 98: 1025, 19: 977, 66: 1002, 73: 979, 42: 1038, 52: 986, 80: 1007, 45: 1044, 62: 942, 40: 1001, 77: 996, 26: 995, 11: 971, 97: 985, 59: 1024, 63: 939, 43: 1036, 88: 977, 47: 967, 44: 964, 93: 967, 51: 962, 69: 1003, 12: 988, 81: 1008, 82: 962, 61: 1003, 30: 985, 3: 1017, 79: 975, 29: 979, 35: 989, 31: 1019, 72: 1017, 78: 977, 100: 977, 7: 947, 91: 973, 90: 966, 38: 960, 20: 927, 8: 983, 75: 999, 92: 1020, 99: 1030, 64: 976, 57: 1030, 86: 1041, 16: 941, 39: 1017, 94: 996, 18: 940, 13: 1024, 27: 990, 53: 1042, 68: 952, 2: 1031, 85: 984, 21: 1009, 36: 1009, 71: 990, 84: 1022, 49: 1010, 60: 990, 70: 967, 9: 949, 67: 989, 87: 980, 74: 970, 32: 983, 89: 999, 96: 998, 50: 991, 56: 1010, 76: 1001, 95: 964, 24: 1004, 14: 950, 83: 1053, 5: 988, 22: 1043, 65: 980, 6: 967, 15: 966, 41: 922, 1: 960, 48: 958, 58: 955, 4: 999}\n"
+     ]
+    }
+   ],
+   "source": [
+    "N = 100000\n",
+    "d1 = {}\n",
+    "d2 = {}\n",
+    "duration1 = np.zeros((N,), dtype=np.float32)\n",
+    "duration2 = np.zeros((N,), dtype=np.float32)\n",
+    "str = \"Mean time = {0:.4e} +/- {1:.4e} sigma\"\n",
+    "for i in range(N):\n",
+    "    k = random.randint(0,100)\n",
+    "    start = timer()\n",
+    "    d1[k] = d1.setdefault(k, 0) + 1\n",
+    "    duration1[i] = timer() - start\n",
+    "    start = timer()\n",
+    "    d2[k] = d2[k] + 1 if k in d2 else 1\n",
+    "    duration2[i] = timer() - start\n",
+    "\n",
+    "print(\"*** With setdefault ***\")\n",
+    "print(str.format(np.mean(duration1), np.std(duration1)))\n",
+    "print(d1)\n",
+    "print(\"*** With if syntax ***\")\n",
+    "print(str.format(np.mean(duration2), np.std(duration2)))\n",
+    "print(d2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "__global__ void multiply_them(float *dest, float *a, float *b)\n",
+      "{\n",
+      " const int i = threadIdx.x;\n",
+      " dest[i] = a[i] * b[i];\n",
+      "}\n",
+      "\n",
+      "[-2.90137202e-01  2.81293893e+00 -1.65957522e+00 -8.62088725e-02\n",
+      "  1.45804667e+00  7.99023032e-01  8.37303638e-01  2.03320041e-01\n",
+      " -1.78530657e+00 -9.88828689e-02  2.83610914e-02 -3.73530895e-01\n",
+      "  1.13282132e+00  3.41161788e-01  7.03967333e-01  2.51203799e+00\n",
+      " -1.10995814e-01 -2.47637033e-01  4.86121893e-01 -5.22908807e-01\n",
+      "  2.42040649e-01  3.50897670e-01 -2.33534321e-01 -5.71392417e-01\n",
+      "  2.94612437e-01  8.51076543e-01 -1.22819483e-01 -1.12457033e-02\n",
+      "  7.62222052e-01 -5.62664986e-01 -2.81204749e-02  2.15141201e+00\n",
+      " -2.52846658e-01 -4.59794961e-02 -2.25618288e-01 -2.16486081e-02\n",
+      "  1.80242336e+00  1.98668197e-01  1.38681874e-01 -8.27464104e-01\n",
+      "  2.84924650e+00  4.41990757e+00  7.45217443e-01  2.79382646e-01\n",
+      " -3.47226970e-02 -3.42106796e-03 -2.03399825e+00  1.97560683e-01\n",
+      "  1.39831769e+00 -8.06550503e-01 -1.33297205e+00  1.37708103e-02\n",
+      " -4.00247842e-01  1.86116919e-02  2.44872691e-03  3.97552028e-02\n",
+      " -3.61032963e+00 -5.08877039e-01  3.09200644e-01 -2.52947181e-01\n",
+      " -1.45006254e-01 -2.06694026e-02 -6.71102941e-01 -2.49164805e-01\n",
+      " -2.45076999e-01 -8.83047581e-02 -2.77056009e-01 -3.63603652e-01\n",
+      "  9.13712531e-02  1.50326788e-01 -1.89215928e-01  6.49247885e-01\n",
+      " -1.04680789e+00  4.51940441e+00 -4.10190582e-01 -1.03717482e+00\n",
+      " -2.83955693e-01  2.13240623e-01 -4.06043559e-01  1.64782095e+00\n",
+      "  3.88930738e-02  1.79433972e-01 -1.63999215e-01  8.21388125e-01\n",
+      "  3.96741897e-01 -1.78203881e+00 -7.89099187e-02 -2.08030403e-01\n",
+      " -1.67499959e+00 -6.03813231e-02  9.04122651e-01  2.57910769e-02\n",
+      " -2.13610873e-01  1.05494165e+00  1.07709356e-02 -4.04108614e-02\n",
+      " -1.21986246e+00  3.52327973e-01  9.47596207e-02 -8.17048013e-01\n",
+      "  4.55845296e-01  1.14165060e-01 -8.36074054e-01 -2.73001790e+00\n",
+      " -2.25733072e-02  1.01752472e+00  1.58793926e-01 -3.07355216e-03\n",
+      "  3.39909703e-01  3.54566932e-01  4.76458110e-03 -2.74281472e-01\n",
+      " -4.90250558e-01 -5.93070447e-01  4.16583568e-01  1.40949559e+00\n",
+      " -6.85005546e-01  5.71568191e-01  2.63672054e-01 -2.65214413e-01\n",
+      " -1.82554388e+00 -2.56274521e-01 -2.20750004e-01  2.96699733e-01\n",
+      " -6.49191618e-01 -4.23787624e-01  1.40397280e-01  5.71154011e-03\n",
+      " -1.53047368e-01 -1.18402898e+00  8.88164192e-02 -7.80049741e-01\n",
+      "  1.22469738e-01  5.82156956e-01 -2.78263122e-01  5.70335388e+00\n",
+      "  2.33998346e+00  9.56273079e-01  1.51689303e+00 -4.10727739e-01\n",
+      "  4.94410247e-01 -3.66233498e-01  7.14528918e-01  2.38090664e-01\n",
+      " -6.46302760e-01 -3.08008194e-01 -3.49969387e-01  1.53435739e-02\n",
+      "  5.77413812e-02  3.72533381e-01  1.98260975e+00 -7.76899280e-03\n",
+      " -1.54382363e-01 -1.86449081e-01 -9.56342891e-02  1.03035802e-02\n",
+      "  1.14140980e-01  1.41981363e+00 -7.38705873e-01 -2.08745885e+00\n",
+      " -6.28826141e-01  1.02032393e-01 -7.70327747e-01  5.62456894e+00\n",
+      " -2.95871317e-01  1.26501453e+00  4.17780161e-01  8.68652642e-01\n",
+      " -4.87591237e-01  6.19750731e-02 -1.26040816e+00 -3.43194783e-01\n",
+      "  1.27273810e+00 -3.55367869e-01  2.32648347e-02  2.89628506e-01\n",
+      " -1.51091516e-01  2.83231330e-03 -1.51034407e-02  1.76298320e+00\n",
+      " -3.59570235e-01  2.01595259e+00 -7.87402838e-02  4.81955826e-01\n",
+      " -1.40068781e+00  2.83799553e-03  6.27174973e-01  3.14519048e-01\n",
+      " -1.10712957e+00 -3.53843659e-01 -6.35562837e-01  4.04969931e-01\n",
+      " -1.65314469e-02  4.08839643e-01  8.27084303e-01  3.12124312e-01\n",
+      " -4.19728532e-02 -4.32954282e-01  5.92173636e-01 -1.95017815e+00\n",
+      "  3.28963660e-02  6.78022718e-03 -1.16085088e+00  5.09182066e-02\n",
+      " -6.12896085e-01 -8.15577134e-02  1.12161350e+00 -3.40709865e-01\n",
+      "  2.52548099e-01 -4.65507209e-01  5.91823518e-01 -7.46792078e-01\n",
+      "  1.19965337e-01  6.05180681e-01 -5.09674013e-01  2.71683186e-01\n",
+      "  2.30722860e-01 -1.65039837e-01 -8.05439278e-02  1.71006903e-01\n",
+      " -2.81729937e-01  1.10326950e-02  3.20048153e-01  9.83216614e-03\n",
+      "  1.53846651e-01  1.16461468e+00 -4.88103539e-01 -2.46520080e-02\n",
+      " -7.18142092e-02 -1.67339086e-03 -2.42161244e-01  2.67475009e-01\n",
+      " -1.32814407e+00 -5.36984839e-02  1.15024351e-01 -1.35336888e+00\n",
+      " -4.01394248e-01  2.28310853e-01  2.85696723e-02 -9.75638449e-01\n",
+      "  1.79381263e+00  1.34870076e+00 -9.75736082e-01 -2.56683826e-01\n",
+      " -9.24404740e-01  1.18486941e+00 -1.60593867e-01  5.32781780e-01\n",
+      " -1.61111280e-01 -5.70979357e-01  2.48478189e-01 -5.70544958e-01\n",
+      " -3.36045325e-02 -1.70751661e-01 -7.83387959e-01 -3.28783333e-01\n",
+      "  6.68492764e-02 -4.55635548e-01  6.78718448e-01  1.61884940e+00\n",
+      "  1.40411711e+00  1.68428612e+00 -1.77249038e+00 -9.48907316e-01\n",
+      "  1.18341064e+00 -5.30112609e-02 -1.22148085e+00  5.62596917e-01\n",
+      "  1.31391573e+00  1.84780562e+00  5.45549512e-01  1.03463089e+00\n",
+      " -7.32520103e-01  3.38242576e-02 -6.29517257e-01  6.84408307e-01\n",
+      " -2.80194193e-01  4.18214053e-01 -2.41577148e+00 -2.93253827e+00\n",
+      "  9.50718462e-01  2.36972594e+00  4.81406689e-01  1.71889886e-02\n",
+      "  6.07256055e-01 -2.35717386e-01 -2.49700022e+00  6.08527958e-01\n",
+      " -7.02820182e-01 -1.22066826e-01  6.92340672e-01  9.00277853e-01\n",
+      " -1.30328491e-01 -4.41209674e-02 -9.21233058e-01  1.37919143e-01\n",
+      "  3.67532313e-01  3.77332382e-02  2.87247807e-01  2.75621843e+00\n",
+      "  7.48364270e-01 -3.02746415e-01  1.44414037e-01  5.80033548e-02\n",
+      "  1.95701897e-01 -1.33819908e-01  1.95749756e-02  9.36418235e-01\n",
+      "  1.13439392e-02 -3.80111992e-01 -9.90576968e-02 -5.11335442e-03\n",
+      " -7.68416934e-03 -2.29034567e+00 -2.59990782e-01 -1.78645715e-01\n",
+      "  1.65659058e+00 -2.18677855e+00  1.03389049e+00  1.44531997e-02\n",
+      " -3.51684600e-01  4.83583333e-03  1.80972859e-01  2.64298506e-02\n",
+      " -2.43892953e-01 -2.05088049e-01 -3.25786471e-01  6.42316818e-01\n",
+      "  1.44556820e-01  9.11835134e-02  1.94504070e+00 -5.24064781e-05\n",
+      " -2.08403915e-01 -1.54737011e-01 -1.55257428e+00  7.24277273e-02\n",
+      " -8.63651395e-01  1.59108377e+00  5.03697515e-01 -3.29254329e-01\n",
+      " -8.90930295e-02  1.14660770e-01 -9.05855417e-01  8.15455243e-03\n",
+      "  1.48153394e-01 -2.29722723e-01 -8.32916439e-01 -6.75686240e-01\n",
+      "  6.29307210e-01 -5.49214065e-01 -8.63798857e-02 -2.25813970e-01\n",
+      "  1.34750354e+00 -1.27279890e+00  1.71834212e-02 -9.28580642e-01\n",
+      " -4.82821971e-01 -3.34665596e-01 -5.94868183e-01 -1.29932821e-01\n",
+      " -1.92791373e-02 -1.49232015e-01  1.34774446e+00  1.19024850e-01\n",
+      " -1.20160818e-01 -1.26262501e-01  1.97318313e-03  1.36541653e+00\n",
+      "  1.02985278e-01 -2.10293263e-01  2.37391043e+00  2.78081983e-01\n",
+      "  1.06245232e+00 -2.89069340e-02 -2.64283836e-01 -3.76625746e-01\n",
+      "  2.17229322e-01  4.31653678e-01  3.62828434e-01  5.05672574e-01\n",
+      "  7.73309097e-02  2.52127171e+00 -5.13727009e-01 -5.77548921e-01\n",
+      " -1.68320909e-01  4.10304099e-01  2.59571910e-01  2.76594549e-01\n",
+      "  1.45417917e+00  4.71246615e-02 -9.45373952e-01 -2.64028404e-02\n",
+      " -1.45785689e+00 -2.15390992e+00  2.60196701e-02 -1.00632882e+00\n",
+      " -2.80741006e-01 -1.05663754e-01 -1.99056208e-01  1.07909453e+00]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pycuda.autoinit\n",
+    "import pycuda.driver as drv\n",
+    "import numpy\n",
+    "from pycuda.compiler import SourceModule\n",
+    "\n",
+    "module = \"\"\"\n",
+    "__global__ void multiply_them(float *dest, float *a, float *b)\n",
+    "{\n",
+    " const int i = threadIdx.x;\n",
+    " dest[i] = a[i] * b[i];\n",
+    "}\n",
+    "\"\"\"\n",
+    "print(module)\n",
+    "\n",
+    "mod = SourceModule(module, nvcc=\"nvcc\", options=[\"-ccbin=/usr/bin/clang-3.8\"])\n",
+    "\n",
+    "multiply_them = mod.get_function(\"multiply_them\")\n",
+    "\n",
+    "a = numpy.random.randn(400).astype(numpy.float32)\n",
+    "b = numpy.random.randn(400).astype(numpy.float32)\n",
+    "\n",
+    "dest = numpy.zeros_like(a)\n",
+    "multiply_them(\n",
+    "        drv.Out(dest), drv.In(a), drv.In(b),\n",
+    "        block=(400,1,1), grid=(1,1))\n",
+    "\n",
+    "print (dest)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "CompileError",
+     "evalue": "nvcc preprocessing of /tmp/tmpn_14m8_h.cu failed\n[command: nvcc --preprocess -arch sm_50 -I/home/arrivault/.virtualenvs/splearn-pycuda/lib/python3.6/site-packages/pycuda/cuda /tmp/tmpn_14m8_h.cu --compiler-options -P]\n[stderr:\nb\"ERROR: No supported gcc/g++ host compiler found, but clang-3.8 is available.\\n       Use 'nvcc -ccbin clang-3.8' to use that instead.\\n\"]",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mCompileError\u001b[0m                              Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-4-1815e70115ee>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpycuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcurandom\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrand\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mcurand\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0ma_gpu\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcurand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0mb_gpu\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcurand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/splearn-pycuda/lib/python3.6/site-packages/pycuda/curandom.py\u001b[0m in \u001b[0;36mrand\u001b[0;34m(shape, dtype, stream)\u001b[0m\n\u001b[1;32m    208\u001b[0m                 \u001b[0mdest\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mPOW_2_M32\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    209\u001b[0m             \"\"\",\n\u001b[0;32m--> 210\u001b[0;31m             \"md5_rng_float\")\n\u001b[0m\u001b[1;32m    211\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mdtype\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    212\u001b[0m         func = get_elwise_kernel(\n",
+      "\u001b[0;32m~/.virtualenvs/splearn-pycuda/lib/python3.6/site-packages/pycuda/elementwise.py\u001b[0m in \u001b[0;36mget_elwise_kernel\u001b[0;34m(arguments, operation, name, keep, options, **kwargs)\u001b[0m\n\u001b[1;32m    159\u001b[0m     \"\"\"\n\u001b[1;32m    160\u001b[0m     func, arguments = get_elwise_kernel_and_types(\n\u001b[0;32m--> 161\u001b[0;31m             arguments, operation, name, keep, options, **kwargs)\n\u001b[0m\u001b[1;32m    162\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    163\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/splearn-pycuda/lib/python3.6/site-packages/pycuda/elementwise.py\u001b[0m in \u001b[0;36mget_elwise_kernel_and_types\u001b[0;34m(arguments, operation, name, keep, options, use_range, **kwargs)\u001b[0m\n\u001b[1;32m    145\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    146\u001b[0m     mod = module_builder(arguments, operation, name,\n\u001b[0;32m--> 147\u001b[0;31m             keep, options, **kwargs)\n\u001b[0m\u001b[1;32m    148\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    149\u001b[0m     \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/splearn-pycuda/lib/python3.6/site-packages/pycuda/elementwise.py\u001b[0m in \u001b[0;36mget_elwise_module\u001b[0;34m(arguments, operation, name, keep, options, preamble, loop_prep, after_loop)\u001b[0m\n\u001b[1;32m     73\u001b[0m             \u001b[0;34m\"after_loop\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mafter_loop\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     74\u001b[0m             },\n\u001b[0;32m---> 75\u001b[0;31m         options=options, keep=keep)\n\u001b[0m\u001b[1;32m     76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     77\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/splearn-pycuda/lib/python3.6/site-packages/pycuda/compiler.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, source, nvcc, options, keep, no_extern_c, arch, code, cache_dir, include_dirs)\u001b[0m\n\u001b[1;32m    289\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    290\u001b[0m         cubin = compile(source, nvcc, options, keep, no_extern_c,\n\u001b[0;32m--> 291\u001b[0;31m                 arch, code, cache_dir, include_dirs)\n\u001b[0m\u001b[1;32m    292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    293\u001b[0m         \u001b[0;32mfrom\u001b[0m \u001b[0mpycuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdriver\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmodule_from_buffer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/splearn-pycuda/lib/python3.6/site-packages/pycuda/compiler.py\u001b[0m in \u001b[0;36mcompile\u001b[0;34m(source, nvcc, options, keep, no_extern_c, arch, code, cache_dir, include_dirs, target)\u001b[0m\n\u001b[1;32m    253\u001b[0m         \u001b[0moptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"-I\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    254\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 255\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mcompile_plain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnvcc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcache_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    257\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mCudaModule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/splearn-pycuda/lib/python3.6/site-packages/pycuda/compiler.py\u001b[0m in \u001b[0;36mcompile_plain\u001b[0;34m(source, options, keep, nvcc, cache_dir, target)\u001b[0m\n\u001b[1;32m     76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     77\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;34m'#include'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msource\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m             \u001b[0mchecksum\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpreprocess_source\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnvcc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     79\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     80\u001b[0m             \u001b[0mchecksum\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/splearn-pycuda/lib/python3.6/site-packages/pycuda/compiler.py\u001b[0m in \u001b[0;36mpreprocess_source\u001b[0;34m(source, options, nvcc)\u001b[0m\n\u001b[1;32m     53\u001b[0m         \u001b[0;32mfrom\u001b[0m \u001b[0mpycuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdriver\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mCompileError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     54\u001b[0m         raise CompileError(\"nvcc preprocessing of %s failed\" % source_path,\n\u001b[0;32m---> 55\u001b[0;31m                            cmdline, stderr=stderr)\n\u001b[0m\u001b[1;32m     56\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     57\u001b[0m     \u001b[0;31m# sanity check\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mCompileError\u001b[0m: nvcc preprocessing of /tmp/tmpn_14m8_h.cu failed\n[command: nvcc --preprocess -arch sm_50 -I/home/arrivault/.virtualenvs/splearn-pycuda/lib/python3.6/site-packages/pycuda/cuda /tmp/tmpn_14m8_h.cu --compiler-options -P]\n[stderr:\nb\"ERROR: No supported gcc/g++ host compiler found, but clang-3.8 is available.\\n       Use 'nvcc -ccbin clang-3.8' to use that instead.\\n\"]"
+     ]
+    }
+   ],
+   "source": [
+    "import pycuda.gpuarray as gpuarray\n",
+    "import pycuda.driver as cuda\n",
+    "import pycuda.autoinit\n",
+    "import numpy\n",
+    "from pycuda.curandom import rand as curand\n",
+    "\n",
+    "a_gpu = curand((50,))\n",
+    "b_gpu = curand((50,))\n",
+    "\n",
+    "from pycuda.elementwise import ElementwiseKernel\n",
+    "lin_comb = ElementwiseKernel(\n",
+    "        \"float a, float *x, float b, float *y, float *z\",\n",
+    "        \"z[i] = a*x[i] + b*y[i]\",\n",
+    "        \"linear_combination\")\n",
+    "\n",
+    "c_gpu = gpuarray.empty_like(a_gpu)\n",
+    "lin_comb(5, a_gpu, 6, b_gpu, c_gpu)\n",
+    "\n",
+    "import numpy.linalg as la\n",
+    "assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from scipy.linalg import svd\n",
+    "import pycuda.autoinit\n",
+    "import pycuda.gpuarray as gpuarray\n",
+    "d = 50\n",
+    "A = np.asarray(np.random.randint(1, 10,(d, d)), dtype=np.float32)\n",
+    "a_gpu = gpuarray.to_gpu(A)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2.19 ms ± 1.49 ms per loop (mean ± std. dev. of 7 runs, 3 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%timeit -n3 [u, s, v] = svd(A)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3.76 ms ± 1.95 ms per loop (mean ± std. dev. of 7 runs, 3 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%timeit -n3 [u, s, v] = np.linalg.svd(A)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "CUSOLVER_STATUS_INTERNAL_ERROR",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mCUSOLVER_STATUS_INTERNAL_ERROR\u001b[0m            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-9-3d7440e3c764>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'timeit'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"-n3 u_gpu, s_gpu, vh_gpu = linalg.svd(a_gpu, 'S', 'S', 'cusolver')\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_line_magic\u001b[0;34m(self, magic_name, line, _stack_depth)\u001b[0m\n\u001b[1;32m   2093\u001b[0m                 \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'local_ns'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getframe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstack_depth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf_locals\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2094\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2095\u001b[0;31m                 \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2096\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2097\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<decorator-gen-61>\u001b[0m in \u001b[0;36mtimeit\u001b[0;34m(self, line, cell, local_ns)\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/lib/python3.6/site-packages/IPython/core/magic.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(f, *a, **k)\u001b[0m\n\u001b[1;32m    185\u001b[0m     \u001b[0;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    186\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m         \u001b[0mcall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    188\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    189\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/lib/python3.6/site-packages/IPython/core/magics/execution.py\u001b[0m in \u001b[0;36mtimeit\u001b[0;34m(self, line, cell, local_ns)\u001b[0m\n\u001b[1;32m   1100\u001b[0m                     \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1102\u001b[0;31m         \u001b[0mall_runs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtimer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrepeat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrepeat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumber\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1103\u001b[0m         \u001b[0mbest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_runs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mnumber\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m         \u001b[0mworst\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_runs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mnumber\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/lib/python3.6/timeit.py\u001b[0m in \u001b[0;36mrepeat\u001b[0;34m(self, repeat, number)\u001b[0m\n\u001b[1;32m    204\u001b[0m         \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    205\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrepeat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 206\u001b[0;31m             \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnumber\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    207\u001b[0m             \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    208\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/lib/python3.6/site-packages/IPython/core/magics/execution.py\u001b[0m in \u001b[0;36mtimeit\u001b[0;34m(self, number)\u001b[0m\n\u001b[1;32m    158\u001b[0m         \u001b[0mgc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    159\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m             \u001b[0mtiming\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minner\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    161\u001b[0m         \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    162\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mgcold\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<magic-timeit>\u001b[0m in \u001b[0;36minner\u001b[0;34m(_it, _timer)\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/build/scikit-cuda/skcuda/linalg.py\u001b[0m in \u001b[0;36msvd\u001b[0;34m(a_gpu, jobu, jobvt, lib)\u001b[0m\n\u001b[1;32m    467\u001b[0m              \u001b[0mldu\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvh_gpu\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgpudata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mldvt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    468\u001b[0m              \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mWork\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgpudata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mLwork\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrwork\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 469\u001b[0;31m              int(devInfo.gpudata))\n\u001b[0m\u001b[1;32m    470\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    471\u001b[0m         \u001b[0;31m# Free working space:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/build/scikit-cuda/skcuda/cusolver.py\u001b[0m in \u001b[0;36mcusolverDnSgesvd\u001b[0;34m(handle, jobu, jobvt, m, n, a, lda, s, U, ldu, vt, ldvt, work, lwork, rwork, devInfo)\u001b[0m\n\u001b[1;32m    998\u001b[0m                                            \u001b[0mldu\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mldvt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwork\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    999\u001b[0m                                            lwork, int(rwork), int(devInfo))\n\u001b[0;32m-> 1000\u001b[0;31m     \u001b[0mcusolverCheckStatus\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1001\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1002\u001b[0m \u001b[0m_libcusolver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcusolverDnDgesvd_bufferSize\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrestype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/build/scikit-cuda/skcuda/cusolver.py\u001b[0m in \u001b[0;36mcusolverCheckStatus\u001b[0;34m(status)\u001b[0m\n\u001b[1;32m    160\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    161\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 162\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mCUSOLVER_EXCEPTIONS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    163\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    164\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mCUSOLVER_ERROR\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mCUSOLVER_STATUS_INTERNAL_ERROR\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "%timeit -n3 u_gpu, s_gpu, vh_gpu = linalg.svd(a_gpu, 'S', 'S', 'cusolver')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "CUSOLVERError",
+     "evalue": "CUSOLVER_STATUS_INTERNAL_ERROR",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mCUSOLVERError\u001b[0m                             Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-8-c960eb315819>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'timeit'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'-n3 u_cp, s_cp, v_cp = cp.linalg.svd(a_cp)'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_line_magic\u001b[0;34m(self, magic_name, line, _stack_depth)\u001b[0m\n\u001b[1;32m   2093\u001b[0m                 \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'local_ns'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getframe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstack_depth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf_locals\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2094\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2095\u001b[0;31m                 \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2096\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2097\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<decorator-gen-61>\u001b[0m in \u001b[0;36mtimeit\u001b[0;34m(self, line, cell, local_ns)\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/lib/python3.6/site-packages/IPython/core/magic.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(f, *a, **k)\u001b[0m\n\u001b[1;32m    185\u001b[0m     \u001b[0;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    186\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m         \u001b[0mcall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    188\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    189\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/lib/python3.6/site-packages/IPython/core/magics/execution.py\u001b[0m in \u001b[0;36mtimeit\u001b[0;34m(self, line, cell, local_ns)\u001b[0m\n\u001b[1;32m   1100\u001b[0m                     \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1102\u001b[0;31m         \u001b[0mall_runs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtimer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrepeat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrepeat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumber\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1103\u001b[0m         \u001b[0mbest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_runs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mnumber\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m         \u001b[0mworst\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_runs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mnumber\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/lib/python3.6/timeit.py\u001b[0m in \u001b[0;36mrepeat\u001b[0;34m(self, repeat, number)\u001b[0m\n\u001b[1;32m    204\u001b[0m         \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    205\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrepeat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 206\u001b[0;31m             \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnumber\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    207\u001b[0m             \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    208\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/lib/python3.6/site-packages/IPython/core/magics/execution.py\u001b[0m in \u001b[0;36mtimeit\u001b[0;34m(self, number)\u001b[0m\n\u001b[1;32m    158\u001b[0m         \u001b[0mgc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    159\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m             \u001b[0mtiming\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minner\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    161\u001b[0m         \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    162\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mgcold\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<magic-timeit>\u001b[0m in \u001b[0;36minner\u001b[0;34m(_it, _timer)\u001b[0m\n",
+      "\u001b[0;32m~/.virtualenvs/sksplearn/lib/python3.6/site-packages/cupy/linalg/decomposition.py\u001b[0m in \u001b[0;36msvd\u001b[0;34m(a, full_matrices, compute_uv)\u001b[0m\n\u001b[1;32m    254\u001b[0m             \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mptr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    255\u001b[0m             \u001b[0ms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mptr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mu_ptr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvt_ptr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 256\u001b[0;31m             workspace.data.ptr, buffersize, 0, dev_info.data.ptr)\n\u001b[0m\u001b[1;32m    257\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# dtype == 'd'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    258\u001b[0m         \u001b[0mbuffersize\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcusolver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdgesvd_bufferSize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mcupy/cuda/cusolver.pyx\u001b[0m in \u001b[0;36mcupy.cuda.cusolver.sgesvd\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mcupy/cuda/cusolver.pyx\u001b[0m in \u001b[0;36mcupy.cuda.cusolver.sgesvd\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mcupy/cuda/cusolver.pyx\u001b[0m in \u001b[0;36mcupy.cuda.cusolver.check_status\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mCUSOLVERError\u001b[0m: CUSOLVER_STATUS_INTERNAL_ERROR"
+     ]
+    }
+   ],
+   "source": [
+    "%timeit -n3 u_cp, s_cp, v_cp = cp.linalg.svd(a_cp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/performances_calculation.py b/examples/performances_calculation.py
index 9eb6d59..0c2987e 100644
--- a/examples/performances_calculation.py
+++ b/examples/performances_calculation.py
@@ -12,26 +12,38 @@ from splearn import Spectral
 from splearn.tests.datasets.get_dataset_path import get_dataset_path
 from splearn.datasets.base import load_data_sample
 
-def test():
-    adr = get_dataset_path("3.pautomac.train")
-    data = load_data_sample(adr=adr)
-    X = data.data
-    sp1 = Spectral()
+def launch(X, version='classic', partial=True, sparse=True, smooth_method='none'):
+    param = "****** {:s} - partial = {:b} - sparse = {:b}, {:s} *****".format(version, partial, sparse, smooth_method)
+    print(param)
+
+    sp1 = Spectral(version=version, partial=partial, sparse=sparse, smooth_method=smooth_method)
     start = timer()
     sp1 = sp1.fit(X)
     duration = timer() - start
-    print("Classic : " + str(duration))
-    
-    sp2 = Spectral()
-    start = timer()
-    sp2 = sp2.fit_opt(X)
-    duration = timer() - start
-    print("Opt : " + str(duration))
-    
-    if sp1.hankel == sp2.hankel:
-        print("Same result.")
-    else:
-        print("The result is different", file=sys.stderr)
+    print("Unoptimized : " + str(duration))
+ 
+#     sp2 = Spectral(version=version, partial=partial, sparse=sparse, smooth_method=smooth_method)
+#     start = timer()
+#     sp2 = sp2.fit(X)
+#     duration = timer() - start
+#     print("Optimized : " + str(duration))
+#     
+#     if sp1.hankel == sp2.hankel:
+#         print("Same result.")
+#     else:
+#         print("The result is different", file=sys.stderr)
+
+def test():
+    adr = get_dataset_path("3.pautomac_light.train")
+    data = load_data_sample(adr=adr)
+    X = data.data
+    param = {'version':['classic', 'prefix', 'suffix', 'factor'], 'partial':[True, False],
+             'sparse':[True, False], 'smooth_method':['none','trigram']}
+    for version in param['version']:
+#        for partial in param['partial']:
+            for sparse in param['sparse']:
+                for smooth_method in param['smooth_method']:
+                    launch(X, version=version, partial=True, sparse=sparse, smooth_method=smooth_method)
 
 if __name__ == '__main__':
     test()
diff --git a/splearn/hankel.py b/splearn/hankel.py
index 627203e..7318d05 100644
--- a/splearn/hankel.py
+++ b/splearn/hankel.py
@@ -115,33 +115,33 @@ class Hankel(object):
     def __eq__(self, other):
         #print("Hankel equality check")
         if self.version != other.version:
-            #print("version is different")
+        #    print("version is different")
             return False
         if self.partial != other.partial:
-            #print("partial is different")
+        #    print("partial is different")
             return False
         if self.sparse != other.sparse:
-            #print("sparse is different")
+        #    print("sparse is different")
             return False
         if self.build_from_sample != other.build_from_sample:
-            #print("build_from_sample is different")
+        #    print("build_from_sample is different")
             return False
         if self.nbL != other.nbL:
-            #print("nbL is different")
+        #    print("nbL is different")
             return False
         if self.nbEx != other.nbEx:
-            #print("nbEx is different")
+        #    print("nbEx is different")
             return False
         if len(self.lhankel) != len(other.lhankel):
-            #print("lhankel length is different")
+        #    print("lhankel length is different")
             return False
         for lh1, lh2 in zip(self.lhankel, other.lhankel):
             if self.sparse:
                 if (lh1 != lh2).nnz > 0:
-                    #print("{:d} elements oh lhandel are different".format((lh1 != lh2).nnz))
+                #    print("{:d} elements oh lhandel are different".format((lh1 != lh2).nnz))
                     return False
             elif not np.array_equal(lh1, lh2):
-                #print("Different Array")
+            #    print("Different Array")
                 return False
         return True
 
diff --git a/splearn/spectral.py b/splearn/spectral.py
index 3ff0e4e..e2b7341 100644
--- a/splearn/spectral.py
+++ b/splearn/spectral.py
@@ -222,7 +222,7 @@ class Spectral(BaseEstimator):
                          mode_quiet=self.mode_quiet)
         self._automaton = self._hankel.to_automaton(self.rank, self.mode_quiet)
         # for smooth option compute trigram dictionnary
-        if self.smooth == 1:
+        if self.smooth:
             self.trigram = self._threegramdict(X.sample)
 
         return self
@@ -251,8 +251,7 @@ class Spectral(BaseEstimator):
             self._hankel = None
             self._automaton = None
             return self
-        #self.polulate_dictionnaries_opt(X)
-        self.polulate_dictionnaries_async(X)
+        self.polulate_dictionnaries_opt(X)
         self._hankel = Hankel(sample_instance=X,
                          lrows=self.lrows, lcolumns=self.lcolumns,
                          version=self.version,
@@ -280,8 +279,6 @@ class Spectral(BaseEstimator):
         X.pref = {}  # dictionary (prefix,count)
         X.suff = {}  # dictionary (suffix,count)
         X.fact = {}  # dictionary (factor,count)
-        futures = []
-        pool = ThreadPoolExecutor(1)
         if self.partial:
             if isinstance(self.lrows, int):
                 lrowsmax = self.lrows
@@ -298,89 +295,21 @@ class Spectral(BaseEstimator):
             lmax = lrowsmax + lcolumnsmax
             #threads = []
             for line in range(X.shape[0]):
-                futures.append(pool.submit(self._populate_a_word, X, line, lrowsmax, version_rows_int,
-                               lcolumnsmax, version_columns_int, lmax))
-#                 self._populate_a_word(X, line, lrowsmax, version_rows_int,
-#                                       lcolumnsmax, version_columns_int, lmax)
-#                                                 )
-#                 threads.append(threading.Thread(target = self._populate_a_word,
-#                                                 args=(X, line, lrowsmax, version_rows_int,
-#                                                       lcolumnsmax, version_columns_int, lmax)
-#                                                 ).start())
+                self._populate_a_word(X, line, lrowsmax, version_rows_int,
+                                      lcolumnsmax, version_columns_int, lmax)
         else:
-            for line in range(X.shape[0]):
-                futures.append(pool.submit(self._populate_a_word, X, line))
-#                self._populate_a_word(X, line)
-        wait(futures)
+                self._populate_a_word(X, line)
 
-    def _populate_a_word_locked(self, X, line, lrowsmax=None, version_rows_int=None,
-                         lcolumnsmax=None, version_columns_int=None, lmax=None):
-        w = X[line, :]
-        w = w[w >= 0]
-        w = tuple([int(x) for x in w[0:]])
-        X.sample[w] = X.sample.setdefault(w, 0) + 1
-        if self.version == "prefix" or self.version == "classic":
-            # empty word treatment for prefixe, suffix, and factor dictionnaries
-            with lock:
-                X.pref[()] = X.pref.setdefault((),0) + 1
-        if self.version == "suffix" or self.version == "classic":
-            with lock:
-                X.suff[()] = X.suff.setdefault((),0) + 1
-        if (self.version == "factor" or self.version == "suffix" or
-            self.version == "prefix"):
-            with lock:
-                X.fact[()] = X.fact.setdefault((),0) + len(w) + 1
-        if self.partial:
-            for i in range(len(w)):
-                if self.version == "classic":
-                    if ((version_rows_int and i + 1 <= lrowsmax) or
-                       (not version_rows_int and w[:i + 1] in self.lrows)):
-                        with lock:
-                            X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1
-                    if ((version_columns_int and i + 1 <= lcolumnsmax) or
-                       (not version_columns_int and w[-( i + 1):] in self.lcolumns)):
-                        with lock:
-                            X.suff[w[-(i + 1):]] = X.suff.setdefault(w[-(i + 1):], 0) + 1
-                elif self.version == "prefix":
-                    # dictionaries dpref is populated until
-                    # lmax = lrows + lcolumns
-                    # dictionaries dfact is populated until lcolumns
-                    if (((version_rows_int or version_columns_int) and i + 1 <= lmax) or
-                         (not version_rows_int and w[:i + 1] in self.lrows) or
-                         (not version_columns_int  and w[:i + 1] in self.lcolumns)):
-                        with lock:
-                            X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1
-                    for j in range(i + 1, len(w) + 1):
-                        if ((version_columns_int and (j - i) <= lmax) or 
-                            (not version_columns_int and w[i:j] in self.lcolumns)):
-                            with lock:
-                                X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
-                elif self.version == "suffix":
-                    if (((version_rows_int or version_columns_int) and i <= lmax) or
-                         (not version_rows_int and w[-(i + 1):] in self.lrows) or
-                         (not version_columns_int and w[-(i + 1):] in self.lcolumns)):
-                        with lock:
-                            X.suff[w[-(i + 1):]] = X.suff.setdefault(w[-(i + 1):], 0) + 1
-                    for j in range(i + 1, len(w) + 1):
-                        if ((version_rows_int and (j - i) <= lmax) or
-                            (not version_rows_int and w[i:j] in self.lrows)):
-                            with lock:
-                                X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
-                elif self.version == "factor":
-                    for j in range(i + 1, len(w) + 1):
-                        if (((version_rows_int or version_columns_int) and (j - i) <= lmax) or
-                             (not version_rows_int and w[i:j] in self.lrows) or
-                             (not version_columns_int and w[i:j] in self.lcolumns)):
-                            with lock:
-                                X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
-        else:  # not partial
-            for i in range(len(w)):
-                with lock:
-                    X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1
-                    X.suff[w[i:]] = X.suff.setdefault(w[i:], 0) + 1
-                for j in range(i + 1, len(w) + 1):
-                    with lock:
-                        X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
+        
+        if self.version == "classic":
+            X.fact = {}
+        elif self.version == "suffix":
+            X.pref = {}
+        elif self.version == "prefix":
+            X.suff = {}
+        elif self.version == "factor":
+            X.suff = {}
+            X.pref = {}
 
     def _populate_a_word(self, X, line, lrowsmax=None, version_rows_int=None,
                          lcolumnsmax=None, version_columns_int=None, lmax=None):
@@ -439,137 +368,6 @@ class Spectral(BaseEstimator):
                 for j in range(i + 1, len(w) + 1):
                     X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
 
-    def _populate_generator(self, X, lrowsmax=None, version_rows_int=None,
-                            lcolumnsmax=None, version_columns_int=None, lmax=None):
-        for line in range(X.shape[0]):
-            w = X[line, :]
-            w = w[w >= 0]
-            w = tuple([int(x) for x in w[0:]])
-            yield ('sample', w, 0)
-            if self.version == "prefix" or self.version == "classic":
-                # empty word treatment for prefixe, suffix, and factor dictionnaries
-                yield ('pref', (), 0)
-            if self.version == "suffix" or self.version == "classic":
-                yield ('suff', (), 0)
-            if (self.version == "factor" or self.version == "suffix" or
-                self.version == "prefix"):
-                yield ('fact', (), len(w))
-            if self.partial:
-                for i in range(len(w)):
-                    if self.version == "classic":
-                        if ((version_rows_int and i + 1 <= lrowsmax) or
-                           (not version_rows_int and w[:i + 1] in self.lrows)):
-                            yield ('pref', w[:i + 1], 0)
-                        if ((version_columns_int and i + 1 <= lcolumnsmax) or
-                           (not version_columns_int and w[-( i + 1):] in self.lcolumns)):
-                            yield ('suff', w[-(i + 1):], 0)
-                    elif self.version == "prefix":
-                        # dictionaries dpref is populated until
-                        # lmax = lrows + lcolumns
-                        # dictionaries dfact is populated until lcolumns
-                        if (((version_rows_int or version_columns_int) and i + 1 <= lmax) or
-                             (not version_rows_int and w[:i + 1] in self.lrows) or
-                             (not version_columns_int  and w[:i + 1] in self.lcolumns)):
-                            yield ('pref', w[:i + 1], 0)
-                        for j in range(i + 1, len(w) + 1):
-                            if ((version_columns_int and (j - i) <= lmax) or 
-                                (not version_columns_int and w[i:j] in self.lcolumns)):
-                                yield ('fact', w[i:j], 0)
-                    elif self.version == "suffix":
-                        if (((version_rows_int or version_columns_int) and i <= lmax) or
-                             (not version_rows_int and w[-(i + 1):] in self.lrows) or
-                             (not version_columns_int and w[-(i + 1):] in self.lcolumns)):
-                            yield ('suff', w[-(i + 1):], 0)
-                        for j in range(i + 1, len(w) + 1):
-                            if ((version_rows_int and (j - i) <= lmax) or
-                                (not version_rows_int and w[i:j] in self.lrows)):
-                                yield ('fact', w[i:j], 0)
-                    elif self.version == "factor":
-                        for j in range(i + 1, len(w) + 1):
-                            if (((version_rows_int or version_columns_int) and (j - i) <= lmax) or
-                                 (not version_rows_int and w[i:j] in self.lrows) or
-                                 (not version_columns_int and w[i:j] in self.lcolumns)):
-                                yield ('fact', w[i:j], 0)
-            else:  # not partial
-                for i in range(len(w)):
-                    yield ('pref', w[:i + 1], 0)
-                    yield ('suff', w[i:], 0)
-                    for j in range(i + 1, len(w) + 1):
-                        yield ('fact', w[i:j], 0)
-
-    def _populate_coroutine(self, d):
-        print("Ready to populate")
-        while True:
-            key, val = (yield)
-            d[key] = d.setdefault(key, 0) + val + 1
-
-    def _populate_each_value(self, d, s, key, val):
-        if val:
-            d[key] = d.setdefault(key, 0) + val + 1
-        else:
-            d[key] = d.setdefault(key, 0) + 1
-
-    def polulate_dictionnaries_async(self, X):
-        """Populates the *sample*, *pref*, *suff*, *fact* dictionnaries of X
-                
-        - Input:
-
-        :param SplearnArray X: object of shape [n_samples,n_features]
-               Training data
-        
-        """
-        if not isinstance(X, SplearnArray):
-            return X
-        X.sample = {}  # dictionary (word,count)
-        X.pref = {}  # dictionary (prefix,count)
-        X.suff = {}  # dictionary (suffix,count)
-        X.fact = {}  # dictionary (factor,count)
-        rsample = self._populate_coroutine(X.sample)
-        next(rsample)
-        rpref = self._populate_coroutine(X.pref)
-        next(rpref)
-        rsuff = self._populate_coroutine(X.suff)
-        next(rsuff)
-        rfact = self._populate_coroutine(X.fact)
-        next(rfact)
-        if self.partial:
-            if isinstance(self.lrows, int):
-                lrowsmax = self.lrows
-                version_rows_int = True
-            else:
-                version_rows_int = False
-                lrowsmax = self.lrows.__len__()
-            if isinstance(self.lcolumns, int):
-                lcolumnsmax = self.lcolumns
-                version_columns_int = True
-            else:
-                lcolumnsmax = self.lcolumns.__len__()
-                version_columns_int = False
-            lmax = lrowsmax + lcolumnsmax
-            for s, key, val in self._populate_generator(X, lrowsmax, version_rows_int, lcolumnsmax, version_columns_int, lmax):
-#                 d = getattr(X, s)
-#                 self._populate_each_value(d, s, key, val)
-                if s == 'fact':
-                    rfact.send((key, val))
-                elif s == 'pref':
-                    rpref.send((key, val))
-                elif s == 'suff':
-                    rsuff.send((key, val))
-                else:
-                    rsample.send((key, val))
-        else:
-            for s, key, val in self._populate_generator(X):
-#                 d = getattr(X, s)
-#                 self._populate_each_value(d, s, key, val)
-                if s == 'fact':
-                    rfact.send((key, val))
-                elif s == 'pref':
-                    rpref.send((key, val))
-                elif s == 'suff':
-                    rsuff.send((key, val))
-                else:
-                    rsample.send((key, val))
-
     def polulate_dictionnaries(self, X):
         """Populates the *sample*, *pref*, *suff*, *fact* dictionnaries of X
                 
-- 
GitLab