diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index cd066e7274fbda67eeffe406339c0decae445a55..a7d4eb653fa572477eb8b94c2d94b3e53ac44383 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -691,7 +691,7 @@ def execClassif(arguments): kFolds = execution.genKFolds(statsIter, args["Classification"]["nb_folds"], statsIterRandomStates) - datasetFiles = dataset.initMultipleDatasets(args["Base"]["pathf"], args["Base"]["name"], nbCores) + datasetFiles = dataset.init_multiple_datasets(args["Base"]["pathf"], args["Base"]["name"], nbCores) views, viewsIndices, allViews = execution.initViews(DATASET, args["Base"]["views"]) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 5745e67a992696783362632e4c868be410cdcd2b..bf1f766bc359b739a5c55bd903f58ac3ede9862b 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -16,7 +16,7 @@ from . import monoview_utils from .analyze_result import execute # Import own modules from .. import monoview_classifiers -from ..utils.dataset import getValue, extractSubset +from ..utils.dataset import get_value, extract_subset from ..utils import hyper_parameter_search # Author-Info @@ -146,7 +146,7 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory): else: feat = X.attrs["name"] CL_type = kwargs["classifier_name"] - X = getValue(X) + X = get_value(X) learningRate = float(len(classificationIndices[0])) / ( len(classificationIndices[0]) + len(classificationIndices[1])) labelsString = "-".join(labelsNames) @@ -166,10 +166,10 @@ def initConstants(args, X, classificationIndices, labelsNames, name, directory): def initTrainTest(X, Y, classificationIndices): trainIndices, testIndices, testIndicesMulticlass = classificationIndices - X_train = extractSubset(X, trainIndices) - X_test = extractSubset(X, testIndices) + X_train = extract_subset(X, trainIndices) + X_test = extract_subset(X, testIndices) if np.array(testIndicesMulticlass).size != 0: - X_test_multiclass = extractSubset(X, testIndicesMulticlass) + X_test_multiclass = extract_subset(X, testIndicesMulticlass) else: X_test_multiclass = [] y_train = Y[trainIndices] @@ -319,7 +319,7 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, # databaseType = None # # # Extract the data using MPI -# X, Y = dataset.getMonoviewShared(path, name, viewName) +# X, Y = dataset.get_monoview_shared(path, name, viewName) # # # Init log # logFileName = time.strftime( diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 02ba90af927f6fa24b101e88dc067d261850db6e..bcc9f201f61d1f26ccaa90d66196955bded3bc36 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -11,7 +11,7 @@ from .multiview_utils import MultiviewResult from . import analyze_results from .. import multiview_classifiers from ..utils import hyper_parameter_search -from ..utils.dataset import getShape +from ..utils.dataset import get_shape # Author-Info __author__ = "Baptiste Bauvin" @@ -38,7 +38,7 @@ def initConstants(kwargs, classificationIndices, metrics, name, nbCores, KFolds, for viewIndex, viewName in zip(viewsIndices, views): logging.info("Info:\t Shape of " + str(viewName) + " :" + str( - getShape(DATASET, viewIndex))) + get_shape(DATASET, viewIndex))) return classifier_name, t_start, viewsIndices, classifier_config, views, learningRate diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py index 07a63a6f8e6380c6ff463a0359cd73a63625a34b..4bb84dc6daf14a9ee282f8f233018370b5d91c3d 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/EarlyFusion.py @@ -2,7 +2,7 @@ # -*- encoding: utf-8 import numpy as np -from ....utils.dataset import getV +from ....utils.dataset import get_v from .... import monoview_classifiers class EarlyFusionClassifier(object): @@ -38,5 +38,5 @@ class EarlyFusionClassifier(object): weights = np.array([1 / nbView for i in range(nbView)]) if sum(weights) != 1: weights = weights / sum(weights) - self.monoviewData = np.concatenate([getV(DATASET, viewIndex, usedIndices) + self.monoviewData = np.concatenate([get_v(DATASET, viewIndex, usedIndices) for index, viewIndex in enumerate(viewsIndices)], axis=1) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py index 8beb6a299404e32d8b1791da34cad45961f87668..314b11bb3b85b0fb570450fc93923997c47c58e9 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusion.py @@ -9,7 +9,7 @@ import math from .... import monoview_classifiers from .... import metrics -from ....utils.dataset import getV +from ....utils.dataset import get_v # def canProbasClassifier(classifierConfig): @@ -142,7 +142,7 @@ class LateFusionClassifier(object): trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) self.monoviewClassifiers = Parallel(n_jobs=self.nbCores)( delayed(fitMonoviewClassifier)(self.monoviewClassifiers[index], - getV(DATASET, viewIndex, trainIndices), + get_v(DATASET, viewIndex, trainIndices), labels[trainIndices], self.needProbas, self.randomState) for index, viewIndex in enumerate(viewsIndices)) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py index 2f6d9b723fedf68094fe591b37b638fe81f729f5..61ec3838f238adeafa98a02471890e58eb1ccbd2 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/BayesianInference.py @@ -2,7 +2,7 @@ import numpy as np from sklearn.metrics import accuracy_score import pkgutil -from .....utils.dataset import getV +from .....utils.dataset import get_v from ..... import monoview_classifiers from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig @@ -78,7 +78,7 @@ class BayesianInference(LateFusionClassifier): viewScores = []#np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) for index, viewIndex in enumerate(viewsIndices): viewScores.append(np.power( - self.monoviewClassifiers[index].predict_proba(getV(DATASET, viewIndex, usedIndices)), + self.monoviewClassifiers[index].predict_proba(get_v(DATASET, viewIndex, usedIndices)), self.weights[index])) viewScores = np.array(viewScores) predictedLabels = np.argmax(np.prod(viewScores, axis=0), axis=1) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py index bef30f9cd687e389745a8b1f117718b84faeb2d4..bcdbfa8222d9f4aa79333dbf52132dff757da227 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/MajorityVoting.py @@ -2,7 +2,7 @@ import numpy as np # from sklearn.metrics import accuracy_score # import pkgutil -from .....utils.dataset import getV +from .....utils.dataset import get_v from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig from ..... import monoview_classifiers @@ -78,7 +78,7 @@ class MajorityVoting(LateFusionClassifier): monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) + get_v(DATASET, viewIndex, usedIndices)) for exampleIndex in range(datasetLength): for viewIndex, featureClassification in enumerate(monoViewDecisions[exampleIndex, :]): votes[exampleIndex, featureClassification] += self.weights[viewIndex] diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py index c5e7b9acab1504a506751722ebfbd13aa8637a9f..739ba0233224cd84057d66ec2b9442a72cbf69b2 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -9,7 +9,7 @@ import itertools from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig from ..... import monoview_classifiers -from .....utils.dataset import getV +from .....utils.dataset import get_v class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin): @@ -119,7 +119,7 @@ class SCMForLinear(LateFusionClassifier): if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) for index, viewIndex in enumerate(viewsIndices): - self.monoviewClassifiers[index].fit(getV(DATASET, viewIndex, trainIndices), + self.monoviewClassifiers[index].fit(get_v(DATASET, viewIndex, trainIndices), labels[trainIndices]) self.SCMForLinearFusionFit(DATASET, labels, usedIndices=trainIndices, viewsIndices=viewsIndices) @@ -133,7 +133,7 @@ class SCMForLinear(LateFusionClassifier): # accus = [] for index, viewIndex in enumerate(viewsIndices): monoviewDecision = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) + get_v(DATASET, viewIndex, usedIndices)) # accus.append(accuracy_score(DATASET.get("Labels").value[usedIndices], monoviewDecision)) monoviewDecisions[:, index] = monoviewDecision features = self.generateInteractions(monoviewDecisions) @@ -150,7 +150,7 @@ class SCMForLinear(LateFusionClassifier): monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) + get_v(DATASET, viewIndex, usedIndices)) features = self.generateInteractions(monoViewDecisions) features = np.array([np.array([feat for feat in feature]) for feature in features]) self.SCMClassifier.fit(features, labels[usedIndices].astype(int)) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py index 8b4c984039282d8aa49e91a6328663f0a48768e8..509256190aaa545ae1dbae70083c183ff24f4ec8 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/SVMForLinear.py @@ -4,7 +4,7 @@ from sklearn.svm import SVC import pkgutil from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig -from .....utils.dataset import getV +from .....utils.dataset import get_v from ..... import monoview_classifiers @@ -63,7 +63,7 @@ class SVMForLinear(LateFusionClassifier): if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) for index, viewIndex in enumerate(viewsIndices): - self.monoviewClassifiers[index].fit(getV(DATASET, viewIndex, trainIndices), + self.monoviewClassifiers[index].fit(get_v(DATASET, viewIndex, trainIndices), labels[trainIndices]) self.SVMForLinearFusionFit(DATASET, labels, usedIndices=trainIndices, viewsIndices=viewsIndices) @@ -79,7 +79,7 @@ class SVMForLinear(LateFusionClassifier): monoviewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoviewDecisions[:, index] = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) + get_v(DATASET, viewIndex, usedIndices)) predictedLabels = self.SVMClassifier.predict(monoviewDecisions) return predictedLabels @@ -91,7 +91,7 @@ class SVMForLinear(LateFusionClassifier): monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) + get_v(DATASET, viewIndex, usedIndices)) self.SVMClassifier.fit(monoViewDecisions, labels[usedIndices]) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py index d7d9f418bdaeccf190e9534e2ea407b71fdc1eef..baf9c56b9fab46b7702b0754da894a4e053e044f 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -4,7 +4,7 @@ import pkgutil from ..... import monoview_classifiers from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig -from .....utils.dataset import getV +from .....utils.dataset import get_v def genParamsSets(classificationKWARGS, randomState, nIter=1): @@ -76,7 +76,7 @@ class WeightedLinear(LateFusionClassifier): viewScores = []#np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) for index, viewIndex in enumerate(viewsIndices): viewScores.append(np.array(self.monoviewClassifiers[index].predict_proba( - getV(DATASET, viewIndex, usedIndices))) * self.weights[index]) + get_v(DATASET, viewIndex, usedIndices))) * self.weights[index]) viewScores = np.array(viewScores) predictedLabels = np.argmax(np.sum(viewScores, axis=0), axis=1) diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py index f3d586fb424079f418525963737065397f42c1e0..c26387fb1057cb648ad97a8be10d0e54da1a5082 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/fusion/fusion.py @@ -11,7 +11,7 @@ except ValueError: import pdb;pdb.set_trace() from ... import monoview_classifiers -from ...utils.dataset import getV +from ...utils.dataset import get_v # Author-Info __author__ = "Baptiste Bauvin" @@ -111,7 +111,7 @@ def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices= weights = np.array([1 / NB_VIEW for i in range(NB_VIEW)]) if sum(weights) != 1: weights = weights / sum(weights) - monoviewData = np.concatenate([weights[index] * getV(DATASET, viewIndex, usedIndices) + monoviewData = np.concatenate([weights[index] * get_v(DATASET, viewIndex, usedIndices) for index, viewIndex in enumerate(viewsIndices)], axis=1) return monoviewData @@ -140,7 +140,7 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): # classifierModule = getattr(monoview_classifiers, classifierName) # classifierMethod = getattr(classifierModule, "hyperParamSearch") # if fusionTypeName == "LateFusion": -# bestSettings.append(classifierMethod(getV(DATASET, viewsIndices[classifierIndex], learningIndices), +# bestSettings.append(classifierMethod(get_v(DATASET, viewsIndices[classifierIndex], learningIndices), # DATASET.get("Labels")[learningIndices], metric=metric, # nIter=nIter)) # else: diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py index bb7fcbe09ccb9e93d3cf6c2dc147415e8a5ee88c..c21ee488974c8663fd863cacc1fe2572f03d59f3 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/weighted_linear_early_fusion.py @@ -1,7 +1,7 @@ import numpy as np import pkgutil -from ..utils.dataset import getV +from ..utils.dataset import get_v from ..multiview.multiview_utils import BaseMultiviewClassifier, get_train_views_indices from .. import monoview_classifiers @@ -64,7 +64,7 @@ class WeightedLinearEarlyFusion(BaseMultiviewClassifier): def hdf5_to_monoview(self, dataset, exmaples): """Here, we concatenate the views for the asked examples """ monoview_data = np.concatenate( - [getV(dataset, view_idx, exmaples) + [get_v(dataset, view_idx, exmaples) for view_weight, (index, view_idx) in zip(self.view_weights, enumerate(self.view_indices))] , axis=1) diff --git a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py index 6a19674c653148ab554121ba41f8f31c66cf9904..9f38fa1f86c278709a3bb75bc8dde544f39e31fa 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/dataset.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/dataset.py @@ -10,86 +10,86 @@ from scipy import sparse from . import get_multiview_db as DB -def getV(DATASET, viewIndex, usedIndices=None): +def get_view(dataset, view_index, used_indices=None): """Used to extract a view as a numpy array or a sparse mat from the HDF5 dataset""" - if usedIndices is None: - usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if type(usedIndices) is int: - return DATASET.get("View" + str(viewIndex))[usedIndices, :] + if used_indices is None: + used_indices = range(dataset.get("Metadata").attrs["datasetLength"]) + if type(used_indices) is int: + return dataset.get("View" + str(view_index))[used_indices, :] else: - usedIndices = np.array(usedIndices) - sortedIndices = np.argsort(usedIndices) - usedIndices = usedIndices[sortedIndices] + used_indices = np.array(used_indices) + sorted_indices = np.argsort(used_indices) + used_indices = used_indices[sorted_indices] - if not DATASET.get("View" + str(viewIndex)).attrs["sparse"]: - return DATASET.get("View" + str(viewIndex))[usedIndices, :][ - np.argsort(sortedIndices), :] + if not dataset.get("View" + str(view_index)).attrs["sparse"]: + return dataset.get("View" + str(view_index))[used_indices, :][ + np.argsort(sorted_indices), :] else: sparse_mat = sparse.csr_matrix( - (DATASET.get("View" + str(viewIndex)).get("data").value, - DATASET.get("View" + str(viewIndex)).get("indices").value, - DATASET.get("View" + str(viewIndex)).get("indptr").value), - shape=DATASET.get("View" + str(viewIndex)).attrs["shape"])[ - usedIndices, :][ - np.argsort(sortedIndices), :] + (dataset.get("View" + str(view_index)).get("data").value, + dataset.get("View" + str(view_index)).get("indices").value, + dataset.get("View" + str(view_index)).get("indptr").value), + shape=dataset.get("View" + str(view_index)).attrs["shape"])[ + used_indices, :][ + np.argsort(sorted_indices), :] return sparse_mat -def getShape(DATASET, viewIndex): +def get_shape(dataset, view_index): """Used to get the dataset shape even if it's sparse""" - if not DATASET.get("View" + str(viewIndex)).attrs["sparse"]: - return DATASET.get("View" + str(viewIndex)).shape + if not dataset.get("View" + str(view_index)).attrs["sparse"]: + return dataset.get("View" + str(view_index)).shape else: - return DATASET.get("View" + str(viewIndex)).attrs["shape"] + return dataset.get("View" + str(view_index)).attrs["shape"] -def getValue(DATASET): +def get_value(dataset): """Used to get the value of a view in the HDF5 dataset even if it sparse""" - if not DATASET.attrs["sparse"]: - return DATASET.value + if not dataset.attrs["sparse"]: + return dataset.value else: - sparse_mat = sparse.csr_matrix((DATASET.get("data").value, - DATASET.get("indices").value, - DATASET.get("indptr").value), - shape=DATASET.attrs["shape"]) + sparse_mat = sparse.csr_matrix((dataset.get("data").value, + dataset.get("indices").value, + dataset.get("indptr").value), + shape=dataset.attrs["shape"]) return sparse_mat -def extractSubset(matrix, usedIndices): +def extract_subset(matrix, used_indices): """Used to extract a subset of a matrix even if it's sparse""" if sparse.issparse(matrix): - newIndptr = np.zeros(len(usedIndices) + 1, dtype=int) + new_indptr = np.zeros(len(used_indices) + 1, dtype=int) oldindptr = matrix.indptr - for exampleIndexIndex, exampleIndex in enumerate(usedIndices): - newIndptr[exampleIndexIndex + 1] = newIndptr[exampleIndexIndex] + ( + for exampleIndexIndex, exampleIndex in enumerate(used_indices): + new_indptr[exampleIndexIndex + 1] = new_indptr[exampleIndexIndex] + ( oldindptr[exampleIndex + 1] - oldindptr[exampleIndex]) - newData = np.ones(newIndptr[-1], dtype=bool) - newIndices = np.zeros(newIndptr[-1], dtype=int) - oldIndices = matrix.indices - for exampleIndexIndex, exampleIndex in enumerate(usedIndices): - newIndices[newIndptr[exampleIndexIndex]:newIndptr[ - exampleIndexIndex + 1]] = oldIndices[ + new_data = np.ones(new_indptr[-1], dtype=bool) + new_indices = np.zeros(new_indptr[-1], dtype=int) + old_indices = matrix.indices + for exampleIndexIndex, exampleIndex in enumerate(used_indices): + new_indices[new_indptr[exampleIndexIndex]:new_indptr[ + exampleIndexIndex + 1]] = old_indices[ oldindptr[exampleIndex]: oldindptr[exampleIndex + 1]] - return sparse.csr_matrix((newData, newIndices, newIndptr), - shape=(len(usedIndices), matrix.shape[1])) + return sparse.csr_matrix((new_data, new_indices, new_indptr), + shape=(len(used_indices), matrix.shape[1])) else: - return matrix[usedIndices] + return matrix[used_indices] -def initMultipleDatasets(pathF, name, nbCores): +def init_multiple_datasets(path_f, name, nb_cores): r"""Used to create copies of the dataset if multicore computation is used. This is a temporary solution to fix the sharing memory issue with HDF5 datasets. Parameters ---------- - pathF : string + path_f : string Path to the original dataset directory name : string Name of the dataset - nbCores : int + nb_cores : int The number of threads that the benchmark can use Returns @@ -97,25 +97,25 @@ def initMultipleDatasets(pathF, name, nbCores): datasetFiles : None Dictionary resuming which mono- and multiview algorithms which will be used in the benchmark. """ - if nbCores > 1: - if DB.datasetsAlreadyExist(pathF, name, nbCores): + if nb_cores > 1: + if DB.datasetsAlreadyExist(path_f, name, nb_cores): logging.debug( "Info:\t Enough copies of the dataset are already available") pass else: logging.debug("Start:\t Creating " + str( - nbCores) + " temporary datasets for multiprocessing") + nb_cores) + " temporary datasets for multiprocessing") logging.warning( " WARNING : /!\ This may use a lot of HDD storage space : " + - str(os.path.getsize(pathF + name + ".hdf5") * nbCores / float( + str(os.path.getsize(path_f + name + ".hdf5") * nb_cores / float( 1024) / 1000 / 1000) + " Gbytes /!\ ") confirmation = confirm() if not confirmation: sys.exit(0) else: - datasetFiles = DB.copyHDF5(pathF, name, nbCores) + dataset_files = DB.copyHDF5(path_f, name, nb_cores) logging.debug("Start:\t Creating datasets for multiprocessing") - return datasetFiles + return dataset_files def confirm(resp=True, timeout=15): @@ -141,10 +141,9 @@ def input_(timeout=15): else: return "y" - -def getMonoviewShared(path, name, viewName, labelsNames, classificationIndices): +def get_monoview_shared(path, name, view_name, labels_names, classification_indices): """ATM is not used with shared memory, but soon :)""" - HDF5_dataset_file = h5py.File(path + name + ".hdf5", "w") - X = HDF5_dataset_file.get(viewName).value - Y = HDF5_dataset_file.get("Labels").value - return X, Y + hdf5_dataset_file = h5py.File(path + name + ".hdf5", "w") + X = hdf5_dataset_file.get(view_name).value + y = hdf5_dataset_file.get("Labels").value + return X, y