spectral.py

# -*- coding: utf-8 -*-
# ######### COPYRIGHT #########
#
# Copyright(c) 2016-2018
# -----------------
#
# * LabEx Archimède: http://labex-archimede.univ-amu.fr/
# * Laboratoire d'Informatique et Systèmes : http://www.lis-lab.fr/
#
# Contributors:
# ------------
#
# * François Denis <francois.denis_AT_lis-lab.fr>
# * Rémi Eyraud <remi.eyraud_AT_lis-lab.fr>
# * Denis Arrivault <contact.dev_AT_lis-lab.fr>
# * Dominique Benielli <dominique.benielli_AT_univ-amu.fr>
#
# Description:
# -----------
#
# scitkit-splearn is a toolbox in
# python for spectral learning algorithms.
#
# Version:
# -------
#
# * splearn version = 1.1.0
#
# Licence:
# -------
#
# License: 3-clause BSD
#
#
# ######### COPYRIGHT #########
"""This module contains the Spectral and Learning class

.. module author:: François Denis

"""
from __future__ import division, print_function
import numpy as np
import math
import warnings
import threading

lock = threading.RLock()

from splearn.datasets.data_sample import SplearnArray
from splearn.hankel import Hankel
from sklearn.base import BaseEstimator
from sklearn.utils import check_array
from sklearn.utils.validation import NotFittedError

class Spectral(BaseEstimator):
    """A Spectral estimator instance

    - Input:

    :param int rank: the ranking number
    :param lrows: (default value = 7) number or list of rows
           a list of strings or an interger indicating the max length
           of elements to consider if partial=True
           otherwise, based on self.pref if version="classic" or
           "prefix", self.fact otherwise
    :type lrows: int or tuple of int
    :param lcolumns: (default value = 7) number or list of columns
           a list of strings or an interger indicating the max length
           of elements to consider if partial=True
           otherwise, based on self.suff if version="classic" or "suffix",
           self.fact otherwise
    :type lcolumns: int or tuple of int
    :param string version: (default value = "classic") version name
    :param boolean partial: (default value = False) build
           of partial Hankel matrix
    :param boolean sparse: (default value = False) True if Hankel
           matrix is sparse
    :param string smooth_method: (default value = "none") method of smoothing

           - 'trigram' the 3-Gram trigram dict
             is computed and used by the predict function,
             in this case the threeGram probability is used instead of Spectral
             probability in negative case

           - 'none' or  something else no smooth method is used
             in predict function.
    :param boolean mode_quiet: (default value = False) True for no
           output message.

    :Example:

    >>> from splearn.spectral import Spectral
    >>> sp = Spectral()
    >>> sp.set_params(partial=True, lcolumns=6, lrows=6, smooth_method='trigram')
    Spectral(lcolumns=6, lrows=6, mode_quiet=False, partial=True, rank=5,
     smooth_method='trigram', sparse=True, version='classic')
    >>> sp.fit(data.data)
    Start Hankel matrix computation
    End of Hankel matrix computation
    Start Building Automaton from Hankel matrix
    End of Automaton computation
    Spectral(lcolumns=6, lrows=6, partial=True, rank=5, smooth_method='trigram', sparse=True, version='classic')
    >>> sp.automaton.initial
    array([-0.00049249,  0.00304676, -0.04405996, -0.10765322, -0.08660063])
    >>> sp.predict(data.data)
    array([  4.38961058e-04,   1.10616861e-01,   1.35569353e-03, ...,
        4.66041996e-06,   4.68177275e-02,   5.24287604e-20])
    >>> sp.loss(data.data, normalize=True)
    -10.530029936056017
    >>> sp.score(data.data)
    10.530029936056017

    """
    def __init__(self,  rank=5, lrows=7, lcolumns=7,
                 version='classic', partial=True,
                 sparse=True, smooth_method='none',
                 mode_quiet=False):
        self.version = version
        self.partial = partial
        self.sparse = sparse
        self.lrows = lrows
        self.lcolumns = lcolumns
        self.rank = rank
        self.trigram = {}
        self.smooth_method = smooth_method
        self._rule_smooth_method(smooth_method)
        self.mode_quiet = mode_quiet
        self._automaton = None
        self._hankel = None

    def get_params(self, deep=True):
        # suppose this estimator has parameters
        """
        return parameters values of Spectral estimator

        - Output:

        :returns: parameters dictionary of Spectral estimator name : value
        :rtype: dict

        """
        return {"rank": self.rank, "version": self.version,
                 "lrows": self.lrows, "lcolumns": self.lcolumns,
                 "partial": self.partial,
                 "sparse": self.sparse,
                 "smooth_method" : self.smooth_method,
                 "mode_quiet" : self.mode_quiet
                }
    @property
    def automaton(self):
        """Automaton build by the fit method. None by default"""
        return self._automaton

    @automaton.setter
    def automaton(self, automaton):
        pass

    @property
    def hankel(self):
        """Hankel build by the fit method. None by default"""
        return self._hankel

    @hankel.setter
    def hankel(self, hankel):
        pass

    def _rule_smooth_method(self, value):
        if self.smooth_method not in ['none', 'trigram']:
            warnings.warn("smooth method should be in ['none', 'trigram']",
                          UserWarning)
            self.smooth_method = 'none'
        if value == 'trigram':
            self.smooth = 1
        else:
            self.smooth = 0

    def set_params(self, **parameters):
        """set the values of  Spectral estimator parameters

        - Output:

        :returns: Spectral estimator with new parameters
        :rtype: Spectral
        """
        for parameter, value in parameters.items():
            self.__setattr__(parameter, value)
            if parameter == "smooth_method":
                self._rule_smooth_method(value)
        return self

    def fit(self, X, y=None):
        """Fit the model

        - Input:

        :param SplearnArray X: object of shape [n_samples,n_features]
               Training data
        :param ndarray y: (default value = None) not used by Spectral estimator
               numpy array of shape [n_samples] Target values


        - Output:

        :returns: Spectral itself with an automaton attribute instanced
                  returns an instance of self.
        :rtype: Spectral

        """
        check_array(X)

        if not isinstance(X, SplearnArray):
            self._hankel = None
            self._automaton = None
            return self
        X = self.polulate_dictionnaries(X)
        self._hankel = Hankel(sample_instance=X,
                         lrows=self.lrows, lcolumns=self.lcolumns,
                         version=self.version,
                         partial=self.partial, sparse=self.sparse,
                         mode_quiet=self.mode_quiet)
        self._automaton = self._hankel.to_automaton(self.rank, self.mode_quiet)
        # for smooth option compute trigram dictionnary
        if self.smooth == 1:
            self.trigram = self._threegramdict(X.sample)

        return self

    def fit_opt(self, X, y=None):
        """Fit the model in a optimal way

        - Input:

        :param SplearnArray X: object of shape [n_samples,n_features]
               Training data
        :param ndarray y: (default value = None) not used by Spectral estimator
               numpy array of shape [n_samples] Target values


        - Output:

        :returns: Spectral itself with an automaton attribute instanced
                  returns an instance of self.
        :rtype: Spectral

        """
        check_array(X)

        if not isinstance(X, SplearnArray):
            self._hankel = None
            self._automaton = None
            return self
        X = self.polulate_dictionnaries_opt(X)
        self._hankel = Hankel(sample_instance=X,
                         lrows=self.lrows, lcolumns=self.lcolumns,
                         version=self.version,
                         partial=self.partial, sparse=self.sparse,
                         mode_quiet=self.mode_quiet)
        self._automaton = self._hankel.to_automaton(self.rank, self.mode_quiet)
        # for smooth option compute trigram dictionnary
        if self.smooth == 1:
            self.trigram = self._threegramdict(X.sample)

        return self

    def polulate_dictionnaries_opt(self, X):
        """Populates the *sample*, *pref*, *suff*, *fact* dictionnaries of X

        - Input:

        :param SplearnArray X: object of shape [n_samples,n_features]
               Training data

        """
        if not isinstance(X, SplearnArray):
            return X
        X.sample = {}  # dictionary (word,count)
        X.pref = {}  # dictionary (prefix,count)
        X.suff = {}  # dictionary (suffix,count)
        X.fact = {}  # dictionary (factor,count)
        if self.partial:
            if isinstance(self.lrows, int):
                lrowsmax = self.lrows
                version_rows_int = True
            else:
                version_rows_int = False
                lrowsmax = self.lrows.__len__()
            if isinstance(self.lcolumns, int):
                lcolumnsmax = self.lcolumns
                version_columns_int = True
            else:
                lcolumnsmax = self.lcolumns.__len__()
                version_columns_int = False
            lmax = lrowsmax + lcolumnsmax
            #threads = []
            for line in range(X.shape[0]):
                self._populate_a_word(X, line, lrowsmax, version_rows_int,
                                      lcolumnsmax, version_columns_int, lmax)
#                                                 )
#                 threads.append(threading.Thread(target = self._populate_a_word,
#                                                 args=(X, line, lrowsmax, version_rows_int,
#                                                       lcolumnsmax, version_columns_int, lmax)
#                                                 ).start())
        else:
            for line in range(X.shape[0]):
                self._populate_a_word(X, line)
        return X

    def _populate_a_word_locked(self, X, line, lrowsmax=None, version_rows_int=None,
                         lcolumnsmax=None, version_columns_int=None, lmax=None):
        w = X[line, :]
        w = w[w >= 0]
        w = tuple([int(x) for x in w[0:]])
        X.sample[w] = X.sample.setdefault(w, 0) + 1
        if self.version == "prefix" or self.version == "classic":
            # empty word treatment for prefixe, suffix, and factor dictionnaries
            with lock:
                X.pref[()] = X.pref.setdefault((),0) + 1
        if self.version == "suffix" or self.version == "classic":
            with lock:
                X.suff[()] = X.suff.setdefault((),0) + 1
        if (self.version == "factor" or self.version == "suffix" or
            self.version == "prefix"):
            with lock:
                X.fact[()] = X.fact.setdefault((),0) + len(w) + 1
        if self.partial:
            for i in range(len(w)):
                if self.version == "classic":
                    if ((version_rows_int and i + 1 <= lrowsmax) or
                       (not version_rows_int and w[:i + 1] in self.lrows)):
                        with lock:
                            X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1
                    if ((version_columns_int and i + 1 <= lcolumnsmax) or
                       (not version_columns_int and w[-( i + 1):] in self.lcolumns)):
                        with lock:
                            X.suff[w[-(i + 1):]] = X.suff.setdefault(w[-(i + 1):], 0) + 1
                elif self.version == "prefix":
                    # dictionaries dpref is populated until
                    # lmax = lrows + lcolumns
                    # dictionaries dfact is populated until lcolumns
                    if (((version_rows_int or version_columns_int) and i + 1 <= lmax) or
                         (not version_rows_int and w[:i + 1] in self.lrows) or
                         (not version_columns_int  and w[:i + 1] in self.lcolumns)):
                        with lock:
                            X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1
                    for j in range(i + 1, len(w) + 1):
                        if ((version_columns_int and (j - i) <= lmax) or
                            (not version_columns_int and w[i:j] in self.lcolumns)):
                            with lock:
                                X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
                elif self.version == "suffix":
                    if (((version_rows_int or version_columns_int) and i <= lmax) or
                         (not version_rows_int and w[-(i + 1):] in self.lrows) or
                         (not version_columns_int and w[-(i + 1):] in self.lcolumns)):
                        with lock:
                            X.suff[w[-(i + 1):]] = X.suff.setdefault(w[-(i + 1):], 0) + 1
                    for j in range(i + 1, len(w) + 1):
                        if ((version_rows_int and (j - i) <= lmax) or
                            (not version_rows_int and w[i:j] in self.lrows)):
                            with lock:
                                X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
                elif self.version == "factor":
                    for j in range(i + 1, len(w) + 1):
                        if (((version_rows_int or version_columns_int) and (j - i) <= lmax) or
                             (not version_rows_int and w[i:j] in self.lrows) or
                             (not version_columns_int and w[i:j] in self.lcolumns)):
                            with lock:
                                X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
        else:  # not partial
            for i in range(len(w)):
                with lock:
                    X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1
                    X.suff[w[i:]] = X.suff.setdefault(w[i:], 0) + 1
                for j in range(i + 1, len(w) + 1):
                    with lock:
                        X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1

    def _populate_a_word(self, X, line, lrowsmax=None, version_rows_int=None,
                         lcolumnsmax=None, version_columns_int=None, lmax=None):
        w = X[line, :]
        w = w[w >= 0]
        w = tuple([int(x) for x in w[0:]])
        X.sample[w] = X.sample.setdefault(w, 0) + 1
        if self.version == "prefix" or self.version == "classic":
            # empty word treatment for prefixe, suffix, and factor dictionnaries
            X.pref[()] = X.pref.setdefault((),0) + 1
        if self.version == "suffix" or self.version == "classic":
            X.suff[()] = X.suff.setdefault((),0) + 1
        if (self.version == "factor" or self.version == "suffix" or
            self.version == "prefix"):
            X.fact[()] = X.fact.setdefault((),0) + len(w) + 1
        if self.partial:
            for i in range(len(w)):
                if self.version == "classic":
                    if ((version_rows_int and i + 1 <= lrowsmax) or
                       (not version_rows_int and w[:i + 1] in self.lrows)):
                        X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1
                    if ((version_columns_int and i + 1 <= lcolumnsmax) or
                       (not version_columns_int and w[-( i + 1):] in self.lcolumns)):
                        X.suff[w[-(i + 1):]] = X.suff.setdefault(w[-(i + 1):], 0) + 1
                elif self.version == "prefix":
                    # dictionaries dpref is populated until
                    # lmax = lrows + lcolumns
                    # dictionaries dfact is populated until lcolumns
                    if (((version_rows_int or version_columns_int) and i + 1 <= lmax) or
                         (not version_rows_int and w[:i + 1] in self.lrows) or
                         (not version_columns_int  and w[:i + 1] in self.lcolumns)):
                        X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1
                    for j in range(i + 1, len(w) + 1):
                        if ((version_columns_int and (j - i) <= lmax) or
                            (not version_columns_int and w[i:j] in self.lcolumns)):
                            X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
                elif self.version == "suffix":
                    if (((version_rows_int or version_columns_int) and i <= lmax) or
                         (not version_rows_int and w[-(i + 1):] in self.lrows) or
                         (not version_columns_int and w[-(i + 1):] in self.lcolumns)):
                        X.suff[w[-(i + 1):]] = X.suff.setdefault(w[-(i + 1):], 0) + 1
                    for j in range(i + 1, len(w) + 1):
                        if ((version_rows_int and (j - i) <= lmax) or
                            (not version_rows_int and w[i:j] in self.lrows)):
                            X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
                elif self.version == "factor":
                    for j in range(i + 1, len(w) + 1):
                        if (((version_rows_int or version_columns_int) and (j - i) <= lmax) or
                             (not version_rows_int and w[i:j] in self.lrows) or
                             (not version_columns_int and w[i:j] in self.lcolumns)):
                            X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1
        else:  # not partial
            for i in range(len(w)):
                X.pref[w[:i + 1]] = X.pref.setdefault(w[:i + 1], 0) + 1
                X.suff[w[i:]] = X.suff.setdefault(w[i:], 0) + 1
                for j in range(i + 1, len(w) + 1):
                    X.fact[w[i:j]] = X.fact.setdefault(w[i:j], 0) + 1


    def polulate_dictionnaries(self, X):
        """Populates the *sample*, *pref*, *suff*, *fact* dictionnaries of X

        - Input:

        :param SplearnArray X: object of shape [n_samples,n_features]
               Training data

        """
        if not isinstance(X, SplearnArray):
            return X
        dsample = {}  # dictionary (word,count)
        dpref = {}  # dictionary (prefix,count)
        dsuff = {}  # dictionary (suffix,count)
        dfact = {}  # dictionary (factor,count)
        for line in range(X.shape[0]):
            w = X[line, :]
            w = w[w >= 0]
            w = tuple([int(x) for x in w[0:]])
            dsample[w] = dsample[w] + 1 if w in dsample else 1
            if self.version == "prefix" or self.version == "classic":
                # empty word treatment for prefixe, suffix, and factor dictionnaries
                dpref[()] = dpref[()] + 1 if () in dpref else 1
            if self.version == "suffix" or self.version == "classic":
                dsuff[()] = dsuff[()] + 1 if () in dsuff else 1
            if self.version == "factor" or self.version == "suffix" \
                    or self.version == "prefix":
                dfact[()] = dfact[()] + len(w) + 1 if () in dfact else len(
                    w) + 1
            if self.partial:
                if isinstance(self.lrows, int):
                    lrowsmax = self.lrows
                    version_rows_int = True
                else:
                    version_rows_int = False
                    lrowsmax = self.lrows.__len__()
                if isinstance(self.lcolumns, int):
                    lcolumnsmax = self.lcolumns
                    version_columns_int = True
                else:
                    lcolumnsmax = self.lcolumns.__len__()
                    version_columns_int = False
                lmax = lrowsmax + lcolumnsmax

                for i in range(len(w)):
                    if self.version == "classic":
                        if (version_rows_int is True and
                                        i + 1 <= lrowsmax) or \
                        (version_rows_int is False and
                                    w[:i + 1] in self.lrows):
                            dpref[w[:i + 1]] = \
                                dpref[w[:i + 1]] + 1 if w[
                                                        :i + 1] in dpref else 1
                        if (version_columns_int is True and
                                        i + 1 <= lcolumnsmax) or \
                                (version_columns_int is False and
                                         w[-( i + 1):] in self.lcolumns):
                            dsuff[w[-(i + 1):]] = dsuff[w[-(i + 1):]] + 1 \
                                if w[-(i + 1):] in dsuff else 1
                    if self.version == "prefix":
                        # dictionaries dpref is populated until
                        # lmax = lrows + lcolumns
                        # dictionaries dfact is populated until lcolumns
                        if ((version_rows_int is True or
                                     version_columns_int is True) and
                                        i + 1 <= lmax) or \
                                (version_rows_int is False and
                                     (w[:i + 1] in self.lrows)) or \
                                (version_columns_int is False and
                                     (w[:i + 1] in self.lcolumns)):
                            dpref[w[:i + 1]] = dpref[w[:i + 1]] + 1 \
                                if w[:i + 1] in dpref else 1
                        for j in range(i + 1, len(w) + 1):
                            if (version_columns_int is True and (
                                j - i) <= lmax) or \
                                    (version_columns_int is False and
                                         (w[i:j] in self.lcolumns)):
                                dfact[w[i:j]] = dfact[w[i:j]] + 1 \
                                    if w[i:j] in dfact else 1
                    if self.version == "suffix":
                        if ((version_rows_int is True or
                                     version_columns_int is True) and
                                    i <= lmax) or \
                                (version_rows_int is False and
                                     (w[-(i + 1):] in self.lrows)) or \
                                (version_columns_int is False and
                                     (w[-(i + 1):] in self.lcolumns)):
                            dsuff[w[-(i + 1):]] = dsuff[w[-(i + 1):]] + 1 \
                                if w[-(i + 1):] in dsuff else 1
                        for j in range(i + 1, len(w) + 1):
                            if (version_rows_int is True and (
                                j - i) <= lmax) or \
                                    (version_rows_int is False and
                                         (w[i:j] in self.lrows)):
                                dfact[w[i:j]] = dfact[w[i:j]] + 1 \
                                    if w[i:j] in dfact else 1
                    if self.version == "factor":
                        for j in range(i + 1, len(w) + 1):
                            if ((version_rows_int is True or
                                         version_columns_int is True) and
                                        (j - i) <= lmax) or \
                                    (version_rows_int is False and
                                         (w[i:j] in self.lrows)) or \
                                    (version_columns_int is False and
                                         (w[i:j] in self.lcolumns)):
                                dfact[w[i:j]] = \
                                    dfact[w[i:j]] + 1 if w[i:j] in dfact else 1

            else:  # not partial
                for i in range(len(w)):
                    dpref[w[:i + 1]] = dpref[w[:i + 1]] + 1 \
                        if w[:i + 1] in dpref else 1
                    dsuff[w[i:]] = dsuff[w[i:]] + 1 if w[i:] in dsuff else 1
                    for j in range(i + 1, len(w) + 1):
                        dfact[w[i:j]] = dfact[w[i:j]] + 1 \
                            if w[i:j] in dfact else 1
        X.sample = dsample
        if self.version == "classic":
            X.pref = dpref
            X.suff = dsuff
            X.fact = {}
        if self.version == "suffix":
            X.suff = dsuff
            X.fact = dfact
            X.pref = {}
        if self.version == "prefix":
            X.pref = dpref
            X.fact = dfact
            X.suff = {}
        if self.version == "factor":
            X.fact = dfact
            X.suff = {}
            X.pref = {}
        return X

    def _populate_sample_dict(self, X):
        dsample = {}  # dictionary (word,count)
        for line in range(X.shape[0]):
            w = X[line, :]
            w = w[w >= 0]
            w = tuple([int(x) for x in w[0:]])
            dsample[w] = dsample[w] + 1 if w in dsample else 1
        return dsample

    @property
    def trigram(self):
        """The trigram dictionary"""
        return self._trigram

    @trigram.setter
    def trigram(self, DPdict_values):
        if (not isinstance(DPdict_values, dict)):
            mess = "DPdict should be a dicionary.\n"
            mess += "Actual : " + str(DPdict_values)
            raise TypeError(mess)
        self._trigram = DPdict_values

    def _trigramprobability(self, sequence, trigram_test):
        prob = np.float64(1.0)
        seq = list(sequence)
        ngramseq = [-1, -1] + seq + [-2]
        if len(seq) < 0:
            return 0
        for start in range(len(ngramseq) - 2):
            end = start + 2
            if tuple(ngramseq[start:end]) in self.trigram.keys():
                if ngramseq[end] in self.trigram[tuple(ngramseq[start:end])].keys():
                    val1_train = np.float64(
                        self.trigram[tuple(ngramseq[start:end])][ngramseq[end]])
                else:
                    val1_train = 0
                val2_train = np.float64(self.trigram[tuple(ngramseq[start:end])][-1])
            else:
                val1_train = -1
                val2_train = -1
            if tuple(ngramseq[start:end]) in trigram_test.keys():
                if ngramseq[end] in trigram_test[
                    tuple(ngramseq[start:end])].keys():
                    val1_test = np.float64(
                        trigram_test[tuple(ngramseq[start:end])][
                            ngramseq[end]])
                else:
                    val1_test = 0
                val2_test = np.float64(
                    trigram_test[tuple(ngramseq[start:end])][-1])
            else:
                val1_test = -1
                val2_test = -1
            if val1_test == -1 and val1_train == -1:
                return 0
            if val1_test == -1:
                prob = prob * val1_train / val2_train
            if val1_train == -1:
                prob = prob * val1_test / val2_test
            if val1_test != -1 and val1_train != -1:
                prob = prob * (val1_test + val1_train) / ( val2_test + val2_train)
        return prob

    def nb_trigram(self):
        """return the number of index affected by the trigram computation


        - Output:

        :returns: int number of trigram_index

        """

        try:
            nb = np.where(self.trigram_index == True)[0].shape[0]
            return nb
        except:
            warnings.warn(UserWarning, "trigram_index does not exist")
            pass


    @staticmethod
    def _threegramdict(sample):
        DPdict = dict()
        for sequence in sample.keys():
            seq = list(sequence)
            ngramseq = [-1, -1] + seq + [-2]
            for start in range(len(ngramseq) - 2):
                end = start + 2
                if tuple(ngramseq[start:end]) in DPdict:
                    table = DPdict[tuple(ngramseq[start:end])]
                    if ngramseq[end] in table:
                        table[ngramseq[end]] = table[ngramseq[end]] + sample[
                        sequence]
                    else:
                        table[ngramseq[end]] = sample[sequence]
                    table[-1] = table[-1] + sample[sequence]
                else:
                    table = dict()
                    table[ngramseq[end]] = sample[sequence]
                    table[-1] = sample[sequence]
                    DPdict[tuple(ngramseq[start:end])] = table
        return DPdict

    def predict(self, X):
        """Predict using the Spectral model

        - Input:

        :param SplearnArray X : of shape data shape = (n_samples, n_features)
               Samples.


        - Output:

        :returns: Probability corresponding to the input X,
                  array-like of shape = n_samples
        :rtype: ndarray
        """

        check_array(X)
        if not hasattr(self, 'automaton'):
            raise NotFittedError("This %(name)s instance is not fitted "
                                 "yet" % {'name': type(self).__name__})
        if self._automaton is None:
            print("No Automaton has been computed, "
                  "check the format of the input fit data")
            warnings.warn("check the format of the input fit data", UserWarning)
            return X

        Y = self.predict_proba(X)
        return Y

    def predict_proba(self, X):
        """
        Predict probability using the Spectral model

        - Input:

        :param SplearnArray X : Samples, data shape = (n_samples, n_features)


        - Output:

        :returns: Probability corresponding to the input X
                  of shape = (n_samples)
        :rtype: ndarray
        """
        #check_is_fitted(self, "classes_")
        X = check_array(X)
        if not hasattr(self, 'automaton'):
            raise NotFittedError("This %(name)s instance is not fitted "
                                 "yet" % {'name': type(self).__name__})

        # if Automaton is None because the fit pass through doing nothing
        if self._automaton is None:
            print("No Automaton has been computed, "
                  "check the format of the input fit data")
            warnings.warn("check the format of the input fit data", UserWarning)
            return X[:,0]
        # if self.smooth == 1 and self.trigram == {}:
        #     warnings.warn("Incompatibility of smooth_method "
        #                   " activate trigram smooth option in predictor "
        #                   " and fit again", UserWarning)
        #     self.trigram = self._threegramdict(X.sample)
        if self.smooth == 1:
            test_sample = self._populate_sample_dict(X=X)
            trigram_test = self._threegramdict(test_sample)
            trigram_index = np.zeros(X.shape[0], dtype=bool)
        Y = np.zeros(X.shape[0])
        i = 0
        for line in range(X.shape[0]):
            w = X[line, :]
            w = w[w >= 0]
            w = tuple([int(x) for x in w[0:]])
            val = self._automaton.val(w)
            if self.smooth == 1 and val <= 0:
                Y[i] = self._trigramprobability(w, trigram_test)
                trigram_index[i] = True
            else:
                Y[i] = val
            i += 1
        if self.smooth == 1:
            self.trigram_index = trigram_index
        return Y

    def loss(self, X, y=None, normalize=True):
        """Log probability using the Spectral model

        - Input:

        :param SplearnArray X: of shape data shape = (n_samples, n_features)
               Samples. X is validation data.
        :param ndarray y: (default value = Null)
               numpy array of shape [n_samples] Target values,
               is the ground truth target for X (in the supervised case) or
               None (in the unsupervised case)
        :param boolean normalize: (default value = True) calculation are
               performed and normalize by the number of sample in case of True

        - Output:

        :returns: mean of Log Probability corresponding to the input X
        :rtype: float
        """
        warnings.simplefilter("error", RuntimeWarning)
        predict_prob = self.predict_proba(X)
        if y is None:
            try:
                if normalize:
                    Y = np.mean(-np.log(predict_prob))
                else:
                    Y = np.sum(-np.log(predict_prob))
            except:
                msg = "function loss or score use log " + \
                      "function, values can't be" + \
                      " negative, use it with smooth_method" + \
                      " to avoid such problem"
                raise ValueError(msg)
            return Y
        else:
            if normalize:
                Y = np.mean((np.subtract(predict_prob, y) ** 2.0))
            else:
                Y = np.sum((np.subtract(predict_prob, y) ** 2.0))
            return Y


    def score(self, X, y=None, scoring="perplexity"):
        """score of the input target

        - Input:

        :param SplearnArray X: of shape data shape = (n_samples, n_features)
               Samples.
        :param ndarray y: (default value = None)
               numpy array of shape [n_samples] Target values,
               is the ground truth target for X (in the supervised case) or
               None (in the unsupervised case)
        :param string scoring: (default value = "perplexity")
               method for score computation

        - Output:

        :returns: score, on the input X
        :rtype: float
        """


        if scoring == "perplexity":
            if y is None:
                return - self.loss(X, y, normalize=True)
            else:
                predict_prob = self.predict_proba(X)
                sA, sC = 0, 0
                sA = sum(predict_prob)
                sC = sum(y)
                s = 0
                perplexity = 0
                for i in range(X.shape[0]):
                    try:
                        s = s + y[i] / sC * math.log(predict_prob[i] / sA)
                        perplexity = math.exp(-s)
                    except:
                        msg = "function loss or score use log " + \
                              "function values can't be" + \
                              " negative, use it with smooth_method" + \
                              "to avoid such problem"
                        raise ValueError(msg)
                return perplexity
        else:
            return - self.loss(X, y, normalize=True)