diff --git a/splearn/spectral.py b/splearn/spectral.py index 19c7f09616b84e73f0a139dc11799165aa344d7b..22f27f10ce40cd7c5ef16578bdc9e5fc83282b4b 100644 --- a/splearn/spectral.py +++ b/splearn/spectral.py @@ -42,6 +42,9 @@ from __future__ import division, print_function import numpy as np import math import warnings +import threading + +lock = threading.RLock() from splearn.datasets.data_sample import SplearnArray from splearn.hankel import Hankel @@ -288,70 +291,147 @@ class Spectral(BaseEstimator): lcolumnsmax = self.lcolumns.__len__() version_columns_int = False lmax = lrowsmax + lcolumnsmax + #threads = [] for line in range(X.shape[0]): - self._populate_a_word(X, line, lrowsmax, version_rows_int, lcolumnsmax, version_columns_int, lmax) + self._populate_a_word(X, line, lrowsmax, version_rows_int, + lcolumnsmax, version_columns_int, lmax) +# ) +# threads.append(threading.Thread(target = self._populate_a_word, +# args=(X, line, lrowsmax, version_rows_int, +# lcolumnsmax, version_columns_int, lmax) +# ).start()) else: for line in range(X.shape[0]): self._populate_a_word(X, line) return X - def _populate_a_word(self, X, line, lrowsmax=None, version_rows_int=None, + def _populate_a_word_locked(self, X, line, lrowsmax=None, version_rows_int=None, lcolumnsmax=None, version_columns_int=None, lmax=None): - w = X[line, :] - w = w[w >= 0] - w = tuple([int(x) for x in w[0:]]) - X.sample[w] = X.sample.setdefault(w, 0) + 1 - if self.version == "prefix" or self.version == "classic": - # empty word treatment for prefixe, suffix, and factor dictionnaries + w = X[line, :] + w = w[w >= 0] + w = tuple([int(x) for x in w[0:]]) + X.sample[w] = X.sample.setdefault(w, 0) + 1 + if self.version == "prefix" or self.version == "classic": + # empty word treatment for prefixe, suffix, and factor dictionnaries + with lock: X.pref[()] = X.pref.setdefault((),0) + 1 - if self.version == "suffix" or self.version == "classic": + if self.version == "suffix" or self.version == "classic": + with lock: X.suff[()] = X.suff.setdefault((),0) + 1 - if (self.version == "factor" or self.version == "suffix" or - self.version == "prefix"): + if (self.version == "factor" or self.version == "suffix" or + self.version == "prefix"): + with lock: X.fact[()] = X.fact.setdefault((),0) + len(w) + 1 - if self.partial: - for i in range(len(w)): - if self.version == "classic": - if ((version_rows_int and i + 1 <= lrowsmax) or - (not version_rows_int and w[:i + 1] in self.lrows)): + if self.partial: + for i in range(len(w)): + if self.version == "classic": + if ((version_rows_int and i + 1 <= lrowsmax) or + (not version_rows_int and w[:i + 1] in self.lrows)): + with lock: X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1 - if ((version_columns_int and i + 1 <= lcolumnsmax) or - (not version_columns_int and w[-( i + 1):] in self.lcolumns)): + if ((version_columns_int and i + 1 <= lcolumnsmax) or + (not version_columns_int and w[-( i + 1):] in self.lcolumns)): + with lock: X.suff[w[-(i + 1):]] = X.suff.setdefault(w[-(i + 1):], 0) + 1 - elif self.version == "prefix": - # dictionaries dpref is populated until - # lmax = lrows + lcolumns - # dictionaries dfact is populated until lcolumns - if (((version_rows_int or version_columns_int) and i + 1 <= lmax) or - (not version_rows_int and w[:i + 1] in self.lrows) or - (not version_columns_int and w[:i + 1] in self.lcolumns)): + elif self.version == "prefix": + # dictionaries dpref is populated until + # lmax = lrows + lcolumns + # dictionaries dfact is populated until lcolumns + if (((version_rows_int or version_columns_int) and i + 1 <= lmax) or + (not version_rows_int and w[:i + 1] in self.lrows) or + (not version_columns_int and w[:i + 1] in self.lcolumns)): + with lock: X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1 - for j in range(i + 1, len(w) + 1): - if ((version_columns_int and (j - i) <= lmax) or - (not version_columns_int and w[i:j] in self.lcolumns)): + for j in range(i + 1, len(w) + 1): + if ((version_columns_int and (j - i) <= lmax) or + (not version_columns_int and w[i:j] in self.lcolumns)): + with lock: X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1 - elif self.version == "suffix": - if (((version_rows_int or version_columns_int) and i <= lmax) or - (not version_rows_int and w[-(i + 1):] in self.lrows) or - (not version_columns_int and w[-(i + 1):] in self.lcolumns)): + elif self.version == "suffix": + if (((version_rows_int or version_columns_int) and i <= lmax) or + (not version_rows_int and w[-(i + 1):] in self.lrows) or + (not version_columns_int and w[-(i + 1):] in self.lcolumns)): + with lock: X.suff[w[-(i + 1):]] = X.suff.setdefault(w[-(i + 1):], 0) + 1 - for j in range(i + 1, len(w) + 1): - if ((version_rows_int and (j - i) <= lmax) or - (not version_rows_int and w[i:j] in self.lrows)): + for j in range(i + 1, len(w) + 1): + if ((version_rows_int and (j - i) <= lmax) or + (not version_rows_int and w[i:j] in self.lrows)): + with lock: X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1 - elif self.version == "factor": - for j in range(i + 1, len(w) + 1): - if (((version_rows_int or version_columns_int) and (j - i) <= lmax) or - (not version_rows_int and w[i:j] in self.lrows) or - (not version_columns_int and w[i:j] in self.lcolumns)): + elif self.version == "factor": + for j in range(i + 1, len(w) + 1): + if (((version_rows_int or version_columns_int) and (j - i) <= lmax) or + (not version_rows_int and w[i:j] in self.lrows) or + (not version_columns_int and w[i:j] in self.lcolumns)): + with lock: X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1 - else: # not partial - for i in range(len(w)): + else: # not partial + for i in range(len(w)): + with lock: X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1 X.suff[w[i:]] = X.suff.setdefault(w[i:], 0) + 1 - for j in range(i + 1, len(w) + 1): + for j in range(i + 1, len(w) + 1): + with lock: X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1 + def _populate_a_word(self, X, line, lrowsmax=None, version_rows_int=None, + lcolumnsmax=None, version_columns_int=None, lmax=None): + w = X[line, :] + w = w[w >= 0] + w = tuple([int(x) for x in w[0:]]) + X.sample[w] = X.sample.setdefault(w, 0) + 1 + if self.version == "prefix" or self.version == "classic": + # empty word treatment for prefixe, suffix, and factor dictionnaries + X.pref[()] = X.pref.setdefault((),0) + 1 + if self.version == "suffix" or self.version == "classic": + X.suff[()] = X.suff.setdefault((),0) + 1 + if (self.version == "factor" or self.version == "suffix" or + self.version == "prefix"): + X.fact[()] = X.fact.setdefault((),0) + len(w) + 1 + if self.partial: + for i in range(len(w)): + if self.version == "classic": + if ((version_rows_int and i + 1 <= lrowsmax) or + (not version_rows_int and w[:i + 1] in self.lrows)): + X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1 + if ((version_columns_int and i + 1 <= lcolumnsmax) or + (not version_columns_int and w[-( i + 1):] in self.lcolumns)): + X.suff[w[-(i + 1):]] = X.suff.setdefault(w[-(i + 1):], 0) + 1 + elif self.version == "prefix": + # dictionaries dpref is populated until + # lmax = lrows + lcolumns + # dictionaries dfact is populated until lcolumns + if (((version_rows_int or version_columns_int) and i + 1 <= lmax) or + (not version_rows_int and w[:i + 1] in self.lrows) or + (not version_columns_int and w[:i + 1] in self.lcolumns)): + X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1 + for j in range(i + 1, len(w) + 1): + if ((version_columns_int and (j - i) <= lmax) or + (not version_columns_int and w[i:j] in self.lcolumns)): + X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1 + elif self.version == "suffix": + if (((version_rows_int or version_columns_int) and i <= lmax) or + (not version_rows_int and w[-(i + 1):] in self.lrows) or + (not version_columns_int and w[-(i + 1):] in self.lcolumns)): + X.suff[w[-(i + 1):]] = X.suff.setdefault(w[-(i + 1):], 0) + 1 + for j in range(i + 1, len(w) + 1): + if ((version_rows_int and (j - i) <= lmax) or + (not version_rows_int and w[i:j] in self.lrows)): + X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1 + elif self.version == "factor": + for j in range(i + 1, len(w) + 1): + if (((version_rows_int or version_columns_int) and (j - i) <= lmax) or + (not version_rows_int and w[i:j] in self.lrows) or + (not version_columns_int and w[i:j] in self.lcolumns)): + X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1 + else: # not partial + for i in range(len(w)): + X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1 + X.suff[w[i:]] = X.suff.setdefault(w[i:], 0) + 1 + for j in range(i + 1, len(w) + 1): + X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1 + + def polulate_dictionnaries(self, X): """Populates the *sample*, *pref*, *suff*, *fact* dictionnaries of X