From 8a13f3f86757795b74ce075aec1163671f66ad69 Mon Sep 17 00:00:00 2001
From: bbauvin <baptiste.bauvin@centrale-marseille.fr>
Date: Wed, 18 Oct 2017 17:32:53 -0400
Subject: [PATCH] Updated SCM version, now it seems to work but tweaked a bit
 on features

---
 Code/MonoMultiViewClassifiers/ExecClassif.py  |   4 +-
 .../MonoviewClassifiers/SCM.py                | 480 +++---------------
 .../Multiview/Fusion/Methods/LateFusion.py    |  23 +-
 .../LateFusionPackage/MajorityVoting.py       |   3 +
 .../Methods/LateFusionPackage/SCMForLinear.py | 344 ++-----------
 .../Methods/LateFusionPackage/SVMForLinear.py |  11 +-
 .../LateFusionPackage/WeightedLinear.py       |   3 +
 7 files changed, 169 insertions(+), 699 deletions(-)

diff --git a/Code/MonoMultiViewClassifiers/ExecClassif.py b/Code/MonoMultiViewClassifiers/ExecClassif.py
index 86b6c6d6..caea0c6e 100644
--- a/Code/MonoMultiViewClassifiers/ExecClassif.py
+++ b/Code/MonoMultiViewClassifiers/ExecClassif.py
@@ -59,7 +59,7 @@ def initBenchmark(args):
         for multiviewPackageName in allMultiviewPackages:
             if multiviewPackageName in algosMutliview:
                 multiviewPackage = getattr(Multiview, multiviewPackageName)
-                multiviewModule = getattr(multiviewPackage, multiviewPackageName)
+                multiviewModule = getattr(multiviewPackage, multiviewPackageName+"Module")
                 benchmark = multiviewModule.getBenchmark(benchmark, args=args)
     if "Monoview" in args.CL_type:
         if args.CL_algos_monoview == ['']:
@@ -272,7 +272,7 @@ def classifyOneIter(LABELS_DICTIONARY, argumentDictionaries, nbCores, directory,
 # __ EXECUTION __ #
 # _______________ #
 def execClassif(arguments):
-    import pdb;pdb.set_trace()
+    # import pdb;pdb.set_trace()
     testVersions()
     start = time.time()
     args = execution.parseTheArgs(arguments)
diff --git a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py
index 567b2eb3..9909564b 100644
--- a/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py
+++ b/Code/MonoMultiViewClassifiers/MonoviewClassifiers/SCM.py
@@ -1,8 +1,18 @@
-from pyscm.utils import _pack_binary_bytes_to_ints
-import pyscm
+# from pyscm.utils import _pack_binary_bytes_to_ints
+# import pyscm
 import h5py
 # from pyscm.binary_attributes.base import BaseBinaryAttributeList
 import os
+import itertools
+# import pyscm.deprecated as pyscm
+import numpy as np
+
+from pyscm.scm import SetCoveringMachineClassifier as scm
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import RandomizedSearchCV
+from sklearn.externals.six import iteritems, iterkeys, itervalues
+from scipy.stats import uniform, randint
 
 # from ..Multiview import GetMultiviewDb as DB
 # from ..utils.Dataset import getShape
@@ -14,435 +24,107 @@ __author__ = "Baptiste Bauvin"
 __status__ = "Prototype"  # Production, Development, Prototype
 
 
+class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin):
+    """docstring for SCM
+    A hands on class of SCM using decision stump, built with sklearn format in order to use sklearn function on SCM like
+    CV, gridsearch, and so on ..."""
+
+    def __init__(self, model_type='conjunction', p=0.1, max_rules=10, random_state=42):
+        super(DecisionStumpSCMNew, self).__init__()
+        self.model_type = model_type
+        self.p = p
+        self.max_rules = max_rules
+        self.random_state = random_state
+
+    def fit(self, X, y):
+        self.clf = scm(model_type=self.model_type, max_rules=self.max_rules, p=self.p, random_state=self.random_state)
+        self.clf.fit(X=X, y=y)
+
+    def predict(self, X):
+        return self.clf.predict(X)
+
+    def set_params(self, **params):
+        for key, value in iteritems(params):
+            if key == 'p':
+                self.p = value
+            if key == 'model_type':
+                self.model_type = value
+            if key == 'max_rules':
+                self.max_rules = value
+
+    def get_stats(self):
+        return {"Binary_attributes": self.clf.model_.rules}
+
+
 def canProbas():
     return False
 
 
 def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs):
-    max_attrtibutes = kwargs['0']
-    try:
-        p = kwargs['1']
-    except:
-        p = 1.0
-    try:
-        model_type = kwargs['2']
-    except:
-        model_type = "conjunction"
-    try:
-        attributeClassification = kwargs["attributeClassification"]
-        binaryAttributes = kwargs["binaryAttributes"]
-    except:
-        attributeClassification, binaryAttributes, dsetFile, name = transformData(DATASET)
-    classifier = pyscm.scm.SetCoveringMachine(p=p, max_attributes=max_attrtibutes, model_type=model_type, verbose=False)
-    classifier.fit(binaryAttributes, CLASS_LABELS, X=None, attribute_classifications=attributeClassification,
-                   iteration_callback=None)
-    try:
-        dsetFile.close()
-        os.remove(name)
-    except:
-        pass
+    modelType = kwargs['0']
+    maxRules = int(kwargs['1'])
+    p = float(kwargs["2"])
+    classifier = DecisionStumpSCMNew(model_type=modelType, max_rules=maxRules, p=p, random_state=randomState)
+    classifier.fit(DATASET, CLASS_LABELS)
     return classifier
 
 
 def paramsToSet(nIter, randomState):
     paramsSet = []
     for _ in range(nIter):
-        paramsSet.append([randomState.randint(1, 20), randomState.random_sample(),
-                          randomState.choice(["conjunction", "disjunction"])])
+        paramsSet.append([randomState.choice(["conjunction", "disjunction"]), randomState.randint(1, 15), randomState.random_sample()])
     return paramsSet
 
 
 def getKWARGS(kwargsList):
     kwargsDict = {}
     for (kwargName, kwargValue) in kwargsList:
-        if kwargName == "CL_SCM_max_rules":
-            kwargsDict['0'] = int(kwargValue)
-        elif kwargName == "CL_SCM_p":
+        if kwargName == "CL_SCM_model_type":
+            kwargsDict['0'] = kwargValue
+        elif kwargName == "CL_SCM_max_rules":
             kwargsDict['1'] = int(kwargValue)
-        elif kwargName == "CL_SCM_model_type":
-            kwargsDict['2'] = kwargValue
+        elif kwargName == "CL_SCM_p":
+            kwargsDict['2'] = float(kwargValue)
     return kwargsDict
 
 
-def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=None, metric=["accuracy_score", None],
-                     nIter=30, nbCores=1):
+def randomizedSearch(X_train, y_train, randomState, outputFileName, KFolds=4, metric=["accuracy_score", None], nIter=30,
+                     nbCores=1):
+    pipeline = Pipeline([('classifier', DecisionStumpSCMNew())])
+
+    param = {"classifier__model_type": ['conjunction', 'disjunction'],
+             "classifier__p": uniform(),
+             "classifier__max_rules": randint(1,30)}
     metricModule = getattr(Metrics, metric[0])
     if metric[1] is not None:
         metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1]))
     else:
         metricKWARGS = {}
-    if metricModule.getConfig()[-14] == "h":
-        baseScore = -1000.0
-        isBetter = "higher"
-    else:
-        baseScore = 1000.0
-        isBetter = "lower"
-    config = []
-    maxAttributesArray = []
-    pArray = []
-    modelsArray = []
-    for iterIndex in range(nIter):
-        max_attributes = randomState.randint(1, 20)
-        maxAttributesArray.append(max_attributes)
-        p = randomState.random_sample()
-        pArray.append(p)
-        model = randomState.choice(["conjunction", "disjunction"])
-        modelsArray.append(model)
-        classifier = pyscm.scm.SetCoveringMachine(p=p, max_attributes=max_attributes, model_type=model, verbose=False)
-        scores = []
-        kFolds = KFolds.split(X_train, y_train)
-        for foldIdx, (trainIndices, testIndices) in enumerate(kFolds):
-            attributeClassification, binaryAttributes, dsetFile, name = transformData(X_train[trainIndices])
-            try:
-                classifier.fit(binaryAttributes, y_train[trainIndices], X=None,
-                               attribute_classifications=attributeClassification, iteration_callback=None)
-
-                predictedLabels = classifier.predict(X_train[testIndices])
-                score = metricModule.score(y_train[testIndices], predictedLabels)
-                scores.append(score)
-            except:
-                pass
-            dsetFile.close()
-            os.remove(name)
-        if scores == []:
-            score = baseScore
-        else:
-            score = np.mean(np.array(scores))
-
-        if isBetter == "higher" and score > baseScore:
-            baseScore = score
-            config = [max_attributes, p, model]
-        if isBetter == "lower" and score < baseScore:
-            baseScore = score
-            config = [max_attributes, p, model]
-
-    assert config != [], "No good configuration found for SCM"
-    scoresArray = scores
-    params = [("maxAttributes", np.array(maxAttributesArray)),
-              ("p", np.array(pArray)),
-              ("model", np.array(modelsArray))]
+    scorer = metricModule.get_scorer(**metricKWARGS)
+    grid = RandomizedSearchCV(pipeline, n_iter=nIter, param_distributions=param, refit=True, n_jobs=nbCores,
+                              scoring=scorer, cv=KFolds, random_state=randomState)
+    detector = grid.fit(X_train, y_train)
+    desc_estimators = [detector.best_params_["classifier__model_type"],
+                       detector.best_params_["classifier__max_rules"],
+                       detector.best_params_["classifier__p"]]
+
+    scoresArray = detector.cv_results_['mean_test_score']
+    params = [("model_type", np.array(detector.cv_results_['param_classifier__model_type'])),
+              ("maxRules", np.array(detector.cv_results_['param_classifier__max_rules'])),
+              ("p", np.array(detector.cv_results_['param_classifier__p']))]
 
     genHeatMaps(params, scoresArray, outputFileName)
-    return config
+    return desc_estimators
 
 
 def getConfig(config):
     if type(config) not in [list, dict]:
-        return "\n\t\t- SCM with max_attributes : " + str(
-            config.max_attributes) + ", model type : " + config.model_type + ", p : " + str(config.p)
+        return "\n\t\t- SCM with model_type: " + config.model_type + ", max_rules : " + str(config.max_rules) +\
+               ", p : " + str(config.p)
     else:
         try:
-            return "\n\t\t- SCM with max_attributes : " + str(config[0]) + ", p : " + str(
-                config[1]) + ", model type : " + str(config[2])
+            return "\n\t\t- SCM with model_type: " + config[0] + ", max_rules : " + str(config[1]) + ", p : " +\
+                   str(config[2])
         except:
-            return "\n\t\t- SCM with max_attributes : " + str(config["0"]) + ", p : " + str(
-                config["1"]) + ", model type : " + str(config["2"])
-
-
-def transformData(dataArray):
-    dataArray = dataArray.astype(np.uint8)
-    if isBinary(dataArray):
-        nbExamples = dataArray.shape[0]
-        featureSequence = [str(featureIndex) for featureIndex in range(dataArray.shape[1])]
-        featureIndexByRule = np.arange(dataArray.shape[1], dtype=np.uint32)
-        binaryAttributes = LazyBaptisteRuleList(featureSequence, featureIndexByRule)
-        packedData = _pack_binary_bytes_to_ints(dataArray, 64)
-        del dataArray
-        nameb = "temp_scm"
-        if not os.path.isfile(nameb):
-            dsetFile = h5py.File(nameb, "w")
-            name = nameb
-        else:
-            fail = True
-            i = 0
-            name = nameb
-            while fail:
-                if not os.path.isfile(name):
-                    dsetFile = h5py.File(name, "w")
-                    fail = False
-                else:
-                    i += 1
-                    name = nameb + str(i)
-
-        packedDataset = dsetFile.create_dataset("temp_scm", data=packedData)
-        dsetFile.close()
-        dsetFile = h5py.File(name, "r")
-        packedDataset = dsetFile.get("temp_scm")
-        attributeClassification = BaptisteRuleClassifications(packedDataset, nbExamples)
-        return attributeClassification, binaryAttributes, dsetFile, name
-
-
-def isBinary(dataset):
-    if type(dataset[0, 0]) is np.uint8:
-        return True
-    for line in dataset:
-        for data in line:
-            if data != 0 or data != 1:
-                return False
-    return True
-
-
-# !/usr/bin/env python
-"""
-	Kover: Learn interpretable computational phenotyping models from k-merized genomic data
-	Copyright (C) 2015  Alexandre Drouin
-	This program is free software: you can redistribute it and/or modify
-	it under the terms of the GNU General Public License as published by
-	the Free Software Foundation, either version 3 of the License, or
-	(at your option) any later version.
-	This program is distributed in the hope that it will be useful,
-	but WITHOUT ANY WARRANTY; without even the implied warranty of
-	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-	GNU General Public License for more details.
-	You should have received a copy of the GNU General Public License
-	along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import numpy as np
-
-from math import ceil
-
-from pyscm.binary_attributes.classifications.popcount import inplace_popcount_32, inplace_popcount_64
-from pyscm.utils import _unpack_binary_bytes_from_ints
-
-
-def _minimum_uint_size(max_value):
-    """
-    Find the minimum size unsigned integer type that can store values of at most max_value
-    From A.Drouin's Kover
-    """
-    if max_value <= np.iinfo(np.uint8).max:
-        return np.uint8
-    elif max_value <= np.iinfo(np.uint16).max:
-        return np.uint16
-    elif max_value <= np.iinfo(np.uint32).max:
-        return np.uint32
-    elif max_value <= np.iinfo(np.uint64).max:
-        return np.uint64
-    else:
-        return np.uint128
-
-
-class BaptisteRule(object):
-    def __init__(self, feature_index, kmer_sequence, type):
-        """
-        A k-mer rule
-        Parameters:
-        -----------
-        feature_index: uint
-            The index of the k-mer
-        kmer_sequence: string
-            The nucleotide sequence of the k-mer
-        type: string
-            The type of rule: presence or absence (use p or a)
-        """
-        self.feature_index = feature_index
-        self.kmer_sequence = kmer_sequence
-        self.type = type
-
-    def classify(self, X):
-        if self.type == "absence":
-            return (X[:, self.feature_index] == 0).astype(np.uint8)
-        else:
-            return (X[:, self.feature_index] == 1).astype(np.uint8)
-
-    def inverse(self):
-        return BaptisteRule(feature_index=self.feature_index, kmer_sequence=self.kmer_sequence,
-                            type="absence" if self.type == "presence" else "presence")
-
-    def __str__(self):
-        return ("Absence(" if self.type == "absence" else "Presence(") + self.kmer_sequence + ")"
-
-
-class LazyBaptisteRuleList(object):
-    """
-    By convention, the first half of the list contains presence rules and the second half contains the absence rules in
-    the same order.
-    """
-
-    def __init__(self, kmer_sequences, feature_index_by_rule):
-        self.n_rules = feature_index_by_rule.shape[0] * 2
-        self.kmer_sequences = kmer_sequences
-        self.feature_index_by_rule = feature_index_by_rule
-        super(LazyBaptisteRuleList, self).__init__()
-
-    def __getitem__(self, idx):
-        if idx >= self.n_rules:
-            raise ValueError("Index %d is out of range for list of size %d" % (idx, self.n_rules))
-        if idx >= len(self.kmer_sequences):
-            type = "absence"
-            feature_idx = self.feature_index_by_rule[idx % len(self.kmer_sequences)]
-        else:
-            type = "presence"
-            feature_idx = self.feature_index_by_rule[idx]
-        return BaptisteRule(idx % len(self.kmer_sequences), self.kmer_sequences[feature_idx], type)
-
-    def __len__(self):
-        return self.n_rules
-
-
-class BaseRuleClassifications(object):
-    def __init__(self):
-        pass
-
-    def get_columns(self, columns):
-        raise NotImplementedError()
-
-    def remove_rows(self, rows):
-        raise NotImplementedError()
-
-    @property
-    def shape(self):
-        raise NotImplementedError()
-
-    def sum_rows(self, rows):
-        raise NotImplementedError()
-
-
-class BaptisteRuleClassifications(BaseRuleClassifications):
-    """
-    Methods involving columns account for presence and absence rules
-    """
-
-    # TODO: Clean up. Get rid of the code to handle deleted rows. We don't need this.
-    def __init__(self, dataset, n_rows, block_size=None):
-        self.dataset = dataset
-        self.dataset_initial_n_rows = n_rows
-        self.dataset_n_rows = n_rows
-        self.dataset_removed_rows = []
-        self.dataset_removed_rows_mask = np.zeros(self.dataset_initial_n_rows, dtype=np.bool)
-        self.block_size = (None, None)
-
-        if block_size is None:
-            if self.dataset.chunks is None:
-                self.block_size = (1, self.dataset.shape[1])
-            else:
-                self.block_size = self.dataset.chunks
-        else:
-            if len(block_size) != 2 or not isinstance(block_size[0], int) or not isinstance(block_size[1], int):
-                raise ValueError("The block size must be a tuple of 2 integers.")
-            self.block_size = block_size
-
-        # Get the size of the ints used to store the data
-        if self.dataset.dtype == np.uint32:
-            self.dataset_pack_size = 32
-            self.inplace_popcount = inplace_popcount_32
-        elif self.dataset.dtype == np.uint64:
-            self.dataset_pack_size = 64
-            self.inplace_popcount = inplace_popcount_64
-        else:
-            raise ValueError("Unsupported data type for packed attribute classifications array. The supported data" +
-                             " types are np.uint32 and np.uint64.")
-
-        super(BaseRuleClassifications, self).__init__()
-
-    def get_columns(self, columns):
-        """
-        Columns can be an integer (or any object that implements __index__) or a sorted list/ndarray.
-        """
-        # TODO: Support slicing, make this more efficient than getting the columns individually.
-        columns_is_int = False
-        if hasattr(columns, "__index__"):  # All int types implement the __index__ method (PEP 357)
-            columns = [columns.__index__()]
-            columns_is_int = True
-        elif isinstance(columns, np.ndarray):
-            columns = columns.tolist()
-        elif isinstance(columns, list):
-            pass
-        else:
-            columns = list(columns)
-        # Detect where an inversion is needed (columns corresponding to absence rules)
-        columns, invert_result = zip(*(((column if column < self.dataset.shape[1] else column % self.dataset.shape[1]),
-                                        (True if column > self.dataset.shape[1] else False)) for column in columns))
-        columns = list(columns)
-        invert_result = np.array(invert_result)
-
-        # Don't return rows that have been deleted
-        row_mask = np.ones(self.dataset.shape[0] * self.dataset_pack_size, dtype=np.bool)
-        row_mask[self.dataset_initial_n_rows:] = False
-        row_mask[self.dataset_removed_rows] = False
-
-        # h5py requires that the column indices are sorted
-        unique, inverse = np.unique(columns, return_inverse=True)
-        result = _unpack_binary_bytes_from_ints(self.dataset[:, unique.tolist()])[row_mask]
-        result = result[:, inverse]
-        result[:, invert_result] = 1 - result[:, invert_result]
-
-        if columns_is_int:
-            return result.reshape(-1)
-        else:
-            return result
-
-    @property
-    def shape(self):
-        return self.dataset_n_rows, self.dataset.shape[1] * 2
-
-    # TODO: allow summing over multiple lists of rows at a time (saves i/o operations)
-    def sum_rows(self, rows):
-        """
-        Note: Assumes that the rows argument does not contain duplicate elements. Rows will not be considered more than once.
-        """
-        rows = np.asarray(rows)
-        result_dtype = _minimum_uint_size(rows.shape[0])
-        result = np.zeros(self.dataset.shape[1] * 2, dtype=result_dtype)
-
-        # Builds a mask to turn off the bits of the rows we do not want to count in the sum.
-        def build_row_mask(example_idx, n_examples, mask_n_bits):
-            if mask_n_bits not in [8, 16, 32, 64, 128]:
-                raise ValueError("Unsupported mask format. Use 8, 16, 32, 64 or 128 bits.")
-
-            n_masks = int(ceil(float(n_examples) / mask_n_bits))
-            masks = [0] * n_masks
-
-            for idx in example_idx:
-                example_mask = idx / mask_n_bits
-                example_mask_idx = mask_n_bits - (idx - mask_n_bits * example_mask) - 1
-                masks[example_mask] |= 1 << example_mask_idx
-
-            return np.array(masks, dtype="u" + str(mask_n_bits / 8))
-
-        # Find the rows that occur in each dataset and their relative index
-        rows = np.sort(rows)
-        dataset_relative_rows = []
-        for row_idx in rows:
-            # Find which row in the dataset corresponds to the requested row
-            # TODO: This is inefficient! Could exploit the fact that rows is sorted to reuse previous iterations.
-            current_idx = -1
-            n_active_elements_seen = 0
-            while n_active_elements_seen <= row_idx:
-                current_idx += 1
-                if not self.dataset_removed_rows_mask[current_idx]:
-                    n_active_elements_seen += 1
-            dataset_relative_rows.append(current_idx)
-
-        # Create a row mask for each dataset
-        row_mask = build_row_mask(dataset_relative_rows, self.dataset_initial_n_rows, self.dataset_pack_size)
-        del dataset_relative_rows
-
-        # For each dataset load the rows for which the mask is not 0. Support column slicing aswell
-        n_col_blocks = int(ceil(1.0 * self.dataset.shape[1] / self.block_size[1]))
-        rows_to_load = np.where(row_mask != 0)[0]
-        n_row_blocks = int(ceil(1.0 * len(rows_to_load) / self.block_size[0]))
-
-        for row_block in xrange(n_row_blocks):
-            block_row_mask = row_mask[rows_to_load[row_block * self.block_size[0]:(row_block + 1) * self.block_size[0]]]
-
-            for col_block in xrange(n_col_blocks):
-
-                # Load the appropriate rows/columns based on the block sizes
-                block = self.dataset[rows_to_load[row_block * self.block_size[0]:(row_block + 1) * self.block_size[0]],
-                        col_block * self.block_size[1]:(col_block + 1) * self.block_size[1]]
-
-                # Popcount
-                if len(block.shape) == 1:
-                    block = block.reshape(1, -1)
-                self.inplace_popcount(block, block_row_mask)
-
-                # Increment the sum
-                result[col_block * self.block_size[1]:min((col_block + 1) * self.block_size[1],
-                                                          self.dataset.shape[1])] += np.sum(block, axis=0)
-
-        # Compute the sum for absence rules
-        result[self.dataset.shape[1]:] = len(rows) - result[: self.dataset.shape[1]]
-
-        return result
+            return "\n\t\t- SCM with model_type: " + config["0"] + ", max_rules : " + str(config["1"]) + ", p : " + \
+                   str(config["2"])
\ No newline at end of file
diff --git a/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusion.py b/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusion.py
index 5e970bd4..34b17cbc 100644
--- a/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusion.py
+++ b/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusion.py
@@ -28,10 +28,14 @@ def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needPr
             classifier = monoviewClassifier.fit(data, labels, randomState, DTConfig)
             return classifier
         else:
+            if type(classifierConfig) is dict:
+                pass
+            else:
+                classifierConfig = dict((str(configIndex), config)
+                                         for configIndex, config in enumerate(classifierConfig))
+
             classifier = monoviewClassifier.fit(data, labels, randomState,
-                                                **dict((str(configIndex), config) for configIndex, config in
-                                                       enumerate(classifierConfig
-                                                                 )))
+                                                **classifierConfig)
             return classifier
 
 
@@ -68,6 +72,9 @@ def intersect(allClassifersNames, directory, viewsIndices, resultsMonoview, clas
             bestCombination = combination
     return [classifiersNames[viewIndex][index] for viewIndex, index in enumerate(bestCombination)]
 
+def allMonoviewClassifiers(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices):
+    return allClassifersNames
+
 
 def bestScore(allClassifersNames, directory, viewsIndices, resultsMonoview, classificationIndices):
     nbViews = len(viewsIndices)
@@ -133,8 +140,8 @@ class LateFusionClassifier(object):
             trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
 
         self.monoviewClassifiers = Parallel(n_jobs=self.nbCores)(
-            delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index],
-                                           getV(DATASET, viewIndex, trainIndices),
-                                           DATASET.get("Labels").value[trainIndices],
-                                           self.monoviewClassifiersConfigs[index], self.needProbas, self.randomState)
-            for index, viewIndex in enumerate(viewsIndices))
+                delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index],
+                                               getV(DATASET, viewIndex, trainIndices),
+                                               DATASET.get("Labels").value[trainIndices],
+                                               self.monoviewClassifiersConfigs[index], self.needProbas, self.randomState)
+                for index, viewIndex in enumerate(viewsIndices))
diff --git a/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py b/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py
index 77986387..966b0ca3 100644
--- a/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py
+++ b/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py
@@ -26,6 +26,9 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl
                                             viewsIndices, resultsMonoview, classificationIndices)
     monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName)
                                  for classifierName in args.FU_L_cl_names]
+    if args.FU_L_cl_names == [""] and args.CL_type == ["Multiview"]:
+        raise AttributeError("You must perform Monoview classification or specify "
+                             "which monoview classifier to use Late Fusion")
     if args.FU_L_cl_config != ['']:
         classifiersConfigs = [
             monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")])
diff --git a/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py
index ad35ce1e..d4b6980c 100644
--- a/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py
+++ b/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py
@@ -1,10 +1,17 @@
 import numpy as np
 import pyscm
-from pyscm.utils import _pack_binary_bytes_to_ints
+# from pyscm.utils import _pack_binary_bytes_to_ints
 import os
 import h5py
-from pyscm.binary_attributes.classifications.popcount import inplace_popcount_32, inplace_popcount_64
-from pyscm.utils import _unpack_binary_bytes_from_ints
+# from pyscm.binary_attributes.classifications.popcount import inplace_popcount_32, inplace_popcount_64
+# from pyscm.utils import _unpack_binary_bytes_from_ints
+
+from pyscm.scm import SetCoveringMachineClassifier as scm
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import RandomizedSearchCV
+from sklearn.externals.six import iteritems, iterkeys, itervalues
+
 from math import ceil
 import random
 from sklearn.metrics import accuracy_score
@@ -15,6 +22,39 @@ from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig
 from ..... import MonoviewClassifiers
 from .....utils.Dataset import getV
 
+
+class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin):
+    """docstring for SCM
+    A hands on class of SCM using decision stump, built with sklearn format in order to use sklearn function on SCM like
+    CV, gridsearch, and so on ..."""
+
+    def __init__(self, model_type='conjunction', p=0.1, max_rules=10, random_state=42):
+        super(DecisionStumpSCMNew, self).__init__()
+        self.model_type = model_type
+        self.p = p
+        self.max_rules = max_rules
+        self.random_state = random_state
+
+    def fit(self, X, y):
+        self.clf = scm(model_type=self.model_type, max_rules=self.max_rules, p=self.p, random_state=self.random_state)
+        self.clf.fit(X=X, y=y)
+
+    def predict(self, X):
+        return self.clf.predict(X)
+
+    def set_params(self, **params):
+        for key, value in iteritems(params):
+            if key == 'p':
+                self.p = value
+            if key == 'model_type':
+                self.model_type = value
+            if key == 'max_rules':
+                self.max_rules = value
+
+    def get_stats(self):
+        return {"Binary_attributes": self.clf.model_.rules}
+
+
 def genParamsSets(classificationKWARGS, randomState, nIter=1):
     nbView = classificationKWARGS["nbView"]
     paramsSets = []
@@ -36,6 +76,9 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl
                                             viewsIndices, resultsMonoview, classificationIndices)
     monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName)
                                  for classifierName in args.FU_L_cl_names]
+    if args.FU_L_cl_names == [""] and args.CL_type == ["Multiview"]:
+        raise AttributeError("You must perform Monoview classification or specify "
+                             "which monoview classifier to use Late Fusion")
     if args.FU_L_cl_config != ['']:
         classifiersConfigs = [
             monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")])
@@ -80,6 +123,7 @@ class SCMForLinear(LateFusionClassifier):
         self.p = paramsSet[0]
         self.maxAttributes = paramsSet[1]
         self.order = paramsSet[3]
+        self.order = 2
         self.modelType = paramsSet[2]
 
     def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None):
@@ -118,55 +162,21 @@ class SCMForLinear(LateFusionClassifier):
             viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"])
 
         nbView = len(viewsIndices)
-        self.SCMClassifier = pyscm.scm.SetCoveringMachine(p=self.p, max_attributes=self.maxAttributes,
-                                                          model_type=self.modelType, verbose=False)
+        self.SCMClassifier = DecisionStumpSCMNew(p=self.p, max_rules=self.maxAttributes, model_type=self.modelType,
+                                                 random_state=self.randomState)
         monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int)
         for index, viewIndex in enumerate(viewsIndices):
             monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict(
                 getV(DATASET, viewIndex, usedIndices))
         features = self.generateInteractions(monoViewDecisions)
-        featureSequence = [str(index) for index in range(nbView)]
-        for orderIndex in range(self.order - 1):
-            featureSequence += [str(featureIndex) for featureIndex in
-                                itertools.combinations(range(monoViewDecisions.shape[1]), orderIndex + 2)]
-        featureIndexByRule = np.arange(features.shape[1], dtype=np.uint32)
-        binaryAttributes = LazyBaptisteRuleList(featureSequence, featureIndexByRule)
-        packedData = _pack_binary_bytes_to_ints(features, 64)
-        nameb = "temp_scm_fusion"
-        if not os.path.isfile(nameb):
-            dsetFile = h5py.File(nameb, "w")
-            name = nameb
-        else:
-            fail = True
-            i = 0
-            name = nameb
-            while fail:
-                if not os.path.isfile(name):
-                    dsetFile = h5py.File(name, "w")
-                    fail = False
-                else:
-                    i += 1
-                    name = nameb + str(i)
-
-        packedDataset = dsetFile.create_dataset("temp_scm", data=packedData)
-        dsetFile.close()
-        dsetFile = h5py.File(name, "r")
-        packedDataset = dsetFile.get("temp_scm")
-        attributeClassification = BaptisteRuleClassifications(packedDataset, features.shape[0])
-        self.SCMClassifier.fit(binaryAttributes, DATASET.get("Labels").value[usedIndices],
-                               attribute_classifications=attributeClassification)
-        try:
-            dsetFile.close()
-            os.remove(name)
-        except:
-            pass
+        features = np.array([np.array([feat for feat in feature]) for feature in features])
+        self.SCMClassifier.fit(features, DATASET.get("Labels").value[usedIndices].astype(int))
 
     def generateInteractions(self, monoViewDecisions):
         if type(self.order) == type(None):
-            order = monoViewDecisions.shape[1]
+            self.order = monoViewDecisions.shape[1]
         if self.order == 1:
             return monoViewDecisions
-
         else:
             genratedIntercations = [monoViewDecisions[:, i] for i in range(monoViewDecisions.shape[1])]
             for orderIndex in range(self.order - 1):
@@ -181,256 +191,14 @@ class SCMForLinear(LateFusionClassifier):
                             generatedDecision = np.logical_or(generatedDecision,
                                                               monoViewDecisions[:, combin[index + 1]])
                     genratedIntercations.append(generatedDecision)
-            return np.transpose(np.array(genratedIntercations).astype(np.uint8))
+            return np.transpose(np.array(genratedIntercations))
 
     def getConfig(self, fusionMethodConfig, monoviewClassifiersNames, monoviewClassifiersConfigs):
         configString = "with SCM for linear with max_attributes : " + str(self.maxAttributes) + ", p : " + str(self.p) + \
-                       " model_type : " + str(self.modelType) + " has chosen " + \
-                       str(len(self.SCMClassifier.attribute_importances)) + " rule(s) \n\t-With monoview classifiers : "
+                       " model_type : " + str(self.modelType) + " order : " + str(self.order)+ " has chosen " + \
+                       str(0.1) + " rule(s) \n\t-With monoview classifiers : "
         for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs,
                                                                     monoviewClassifiersNames):
             monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName)
             configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig)
-        return configString
-
-
-def _minimum_uint_size(max_value):
-    """
-    Find the minimum size unsigned integer type that can store values of at most max_value
-    From A.Drouin's Kover
-    """
-    if max_value <= np.iinfo(np.uint8).max:
-        return np.uint8
-    elif max_value <= np.iinfo(np.uint16).max:
-        return np.uint16
-    elif max_value <= np.iinfo(np.uint32).max:
-        return np.uint32
-    elif max_value <= np.iinfo(np.uint64).max:
-        return np.uint64
-    else:
-        return np.uint128
-
-
-class BaptisteRule(object):
-    def __init__(self, feature_index, kmer_sequence, type):
-        """
-        A k-mer rule
-        Parameters:
-        -----------
-        feature_index: uint
-            The index of the k-mer
-        kmer_sequence: string
-            The nucleotide sequence of the k-mer
-        type: string
-            The type of rule: presence or absence (use p or a)
-        """
-        self.feature_index = feature_index
-        self.kmer_sequence = kmer_sequence
-        self.type = type
-
-    def classify(self, X):
-        if self.type == "absence":
-            return (X[:, self.feature_index] == 0).astype(np.uint8)
-        else:
-            return (X[:, self.feature_index] == 1).astype(np.uint8)
-
-    def inverse(self):
-        return BaptisteRule(feature_index=self.feature_index, kmer_sequence=self.kmer_sequence,
-                            type="absence" if self.type == "presence" else "presence")
-
-    def __str__(self):
-        return ("Absence(" if self.type == "absence" else "Presence(") + self.kmer_sequence + ")"
-
-
-class LazyBaptisteRuleList(object):
-    """
-    By convention, the first half of the list contains presence rules and the second half contains the absence rules in
-    the same order.
-    """
-
-    def __init__(self, kmer_sequences, feature_index_by_rule):
-        self.n_rules = feature_index_by_rule.shape[0] * 2
-        self.kmer_sequences = kmer_sequences
-        self.feature_index_by_rule = feature_index_by_rule
-        super(LazyBaptisteRuleList, self).__init__()
-
-    def __getitem__(self, idx):
-        if idx >= self.n_rules:
-            raise ValueError("Index %d is out of range for list of size %d" % (idx, self.n_rules))
-        if idx >= len(self.kmer_sequences):
-            type = "absence"
-            feature_idx = self.feature_index_by_rule[idx % len(self.kmer_sequences)]
-        else:
-            type = "presence"
-            feature_idx = self.feature_index_by_rule[idx]
-        return BaptisteRule(idx % len(self.kmer_sequences), self.kmer_sequences[feature_idx], type)
-
-    def __len__(self):
-        return self.n_rules
-
-
-class BaseRuleClassifications(object):
-    def __init__(self):
-        pass
-
-    def get_columns(self, columns):
-        raise NotImplementedError()
-
-    def remove_rows(self, rows):
-        raise NotImplementedError()
-
-    @property
-    def shape(self):
-        raise NotImplementedError()
-
-    def sum_rows(self, rows):
-        raise NotImplementedError()
-
-
-class BaptisteRuleClassifications(BaseRuleClassifications):
-    """
-    Methods involving columns account for presence and absence rules
-    """
-
-    # TODO: Clean up. Get rid of the code to handle deleted rows. We don't need this.
-    def __init__(self, dataset, n_rows, block_size=None):
-        self.dataset = dataset
-        self.dataset_initial_n_rows = n_rows
-        self.dataset_n_rows = n_rows
-        self.dataset_removed_rows = []
-        self.dataset_removed_rows_mask = np.zeros(self.dataset_initial_n_rows, dtype=np.bool)
-        self.block_size = (None, None)
-
-        if block_size is None:
-            if self.dataset.chunks is None:
-                self.block_size = (1, self.dataset.shape[1])
-            else:
-                self.block_size = self.dataset.chunks
-        else:
-            if len(block_size) != 2 or not isinstance(block_size[0], int) or not isinstance(block_size[1], int):
-                raise ValueError("The block size must be a tuple of 2 integers.")
-            self.block_size = block_size
-
-        # Get the size of the ints used to store the data
-        if self.dataset.dtype == np.uint32:
-            self.dataset_pack_size = 32
-            self.inplace_popcount = inplace_popcount_32
-        elif self.dataset.dtype == np.uint64:
-            self.dataset_pack_size = 64
-            self.inplace_popcount = inplace_popcount_64
-        else:
-            raise ValueError("Unsupported data type for packed attribute classifications array. The supported data" +
-                             " types are np.uint32 and np.uint64.")
-
-        super(BaseRuleClassifications, self).__init__()
-
-    def get_columns(self, columns):
-        """
-        Columns can be an integer (or any object that implements __index__) or a sorted list/ndarray.
-        """
-        # TODO: Support slicing, make this more efficient than getting the columns individually.
-        columns_is_int = False
-        if hasattr(columns, "__index__"):  # All int types implement the __index__ method (PEP 357)
-            columns = [columns.__index__()]
-            columns_is_int = True
-        elif isinstance(columns, np.ndarray):
-            columns = columns.tolist()
-        elif isinstance(columns, list):
-            pass
-        else:
-            columns = list(columns)
-        # Detect where an inversion is needed (columns corresponding to absence rules)
-        columns, invert_result = zip(*(((column if column < self.dataset.shape[1] else column % self.dataset.shape[1]),
-                                        (True if column > self.dataset.shape[1] else False)) for column in columns))
-        columns = list(columns)
-        invert_result = np.array(invert_result)
-
-        # Don't return rows that have been deleted
-        row_mask = np.ones(self.dataset.shape[0] * self.dataset_pack_size, dtype=np.bool)
-        row_mask[self.dataset_initial_n_rows:] = False
-        row_mask[self.dataset_removed_rows] = False
-
-        # h5py requires that the column indices are sorted
-        unique, inverse = np.unique(columns, return_inverse=True)
-        result = _unpack_binary_bytes_from_ints(self.dataset[:, unique.tolist()])[row_mask]
-        result = result[:, inverse]
-        result[:, invert_result] = 1 - result[:, invert_result]
-
-        if columns_is_int:
-            return result.reshape(-1)
-        else:
-            return result
-
-    @property
-    def shape(self):
-        return self.dataset_n_rows, self.dataset.shape[1] * 2
-
-    # TODO: allow summing over multiple lists of rows at a time (saves i/o operations)
-    def sum_rows(self, rows):
-        """
-        Note: Assumes that the rows argument does not contain duplicate elements. Rows will not be considered more than once.
-        """
-        rows = np.asarray(rows)
-        result_dtype = _minimum_uint_size(rows.shape[0])
-        result = np.zeros(self.dataset.shape[1] * 2, dtype=result_dtype)
-
-        # Builds a mask to turn off the bits of the rows we do not want to count in the sum.
-        def build_row_mask(example_idx, n_examples, mask_n_bits):
-            if mask_n_bits not in [8, 16, 32, 64, 128]:
-                raise ValueError("Unsupported mask format. Use 8, 16, 32, 64 or 128 bits.")
-
-            n_masks = int(ceil(float(n_examples) / mask_n_bits))
-            masks = [0] * n_masks
-
-            for idx in example_idx:
-                example_mask = idx / mask_n_bits
-                example_mask_idx = mask_n_bits - (idx - mask_n_bits * example_mask) - 1
-                masks[example_mask] |= 1 << example_mask_idx
-
-            return np.array(masks, dtype="u" + str(mask_n_bits / 8))
-
-        # Find the rows that occur in each dataset and their relative index
-        rows = np.sort(rows)
-        dataset_relative_rows = []
-        for row_idx in rows:
-            # Find which row in the dataset corresponds to the requested row
-            # TODO: This is inefficient! Could exploit the fact that rows is sorted to reuse previous iterations.
-            current_idx = -1
-            n_active_elements_seen = 0
-            while n_active_elements_seen <= row_idx:
-                current_idx += 1
-                if not self.dataset_removed_rows_mask[current_idx]:
-                    n_active_elements_seen += 1
-            dataset_relative_rows.append(current_idx)
-
-        # Create a row mask for each dataset
-        row_mask = build_row_mask(dataset_relative_rows, self.dataset_initial_n_rows, self.dataset_pack_size)
-        del dataset_relative_rows
-
-        # For each dataset load the rows for which the mask is not 0. Support column slicing aswell
-        n_col_blocks = int(ceil(1.0 * self.dataset.shape[1] / self.block_size[1]))
-        rows_to_load = np.where(row_mask != 0)[0]
-        n_row_blocks = int(ceil(1.0 * len(rows_to_load) / self.block_size[0]))
-
-        for row_block in xrange(n_row_blocks):
-            block_row_mask = row_mask[rows_to_load[row_block * self.block_size[0]:(row_block + 1) * self.block_size[0]]]
-
-            for col_block in xrange(n_col_blocks):
-
-                # Load the appropriate rows/columns based on the block sizes
-                block = self.dataset[rows_to_load[row_block * self.block_size[0]:(row_block + 1) * self.block_size[0]],
-                        col_block * self.block_size[1]:(col_block + 1) * self.block_size[1]]
-
-                # Popcount
-                if len(block.shape) == 1:
-                    block = block.reshape(1, -1)
-                self.inplace_popcount(block, block_row_mask)
-
-                # Increment the sum
-                result[col_block * self.block_size[1]:min((col_block + 1) * self.block_size[1],
-                                                          self.dataset.shape[1])] += np.sum(block, axis=0)
-
-        # Compute the sum for absence rules
-        result[self.dataset.shape[1]:] = len(rows) - result[: self.dataset.shape[1]]
-
-        return result
+        return configString
\ No newline at end of file
diff --git a/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py b/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py
index 8259b53c..b4b8ef04 100644
--- a/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py
+++ b/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py
@@ -24,6 +24,9 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl
                                             viewsIndices, resultsMonoview, classificationIndices)
     monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName)
                                  for classifierName in args.FU_L_cl_names]
+    if args.FU_L_cl_names == [""] and args.CL_type == ["Multiview"]:
+        raise AttributeError("You must perform Monoview classification or specify "
+                             "which monoview classifier to use Late Fusion")
     if args.FU_L_cl_config != ['']:
         classifiersConfigs = [
             monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")])
@@ -62,12 +65,16 @@ class SVMForLinear(LateFusionClassifier):
         if type(self.monoviewClassifiersConfigs[0]) == dict:
             for index, viewIndex in enumerate(viewsIndices):
                 monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index])
+                if type(self.monoviewClassifiersConfigs[index]) is dict:
+                    pass
+                else:
+                    self.monoviewClassifiersConfigs[index] = dict((str(configIndex), config)
+                                            for configIndex, config in enumerate(self.monoviewClassifiersConfigs[index]))
                 self.monoviewClassifiers.append(
                     monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices),
                                            DATASET.get("Labels").value[trainIndices], self.randomState,
                                            NB_CORES=self.nbCores,
-                                           **dict((str(configIndex), config) for configIndex, config in
-                                                  enumerate(self.monoviewClassifiersConfigs[index]))))
+                                           **self.monoviewClassifiersConfigs[index]))
         else:
             self.monoviewClassifiers = self.monoviewClassifiersConfigs
         self.SVMForLinearFusionFit(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices)
diff --git a/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py
index 24de1714..f46aa43a 100644
--- a/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py
+++ b/Code/MonoMultiViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py
@@ -26,6 +26,9 @@ def getArgs(benchmark, args, views, viewsIndices, directory, resultsMonoview, cl
                                             viewsIndices, resultsMonoview, classificationIndices)
     monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName)
                                  for classifierName in args.FU_L_cl_names]
+    if args.FU_L_cl_names == [""] and args.CL_type == ["Multiview"]:
+        raise AttributeError("You must perform Monoview classification or specify "
+                             "which monoview classifier to use Late Fusion")
     if args.FU_L_cl_config != ['']:
         classifiersConfigs = [
             monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")])
-- 
GitLab