From dcffa946dc96db5cab6f7d00e862eb01487141d9 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@centrale-marseille.fr> Date: Tue, 4 Oct 2016 19:41:31 -0400 Subject: [PATCH] Debugged everything --- Code/MonoMutliViewClassifiers/ExecClassif.py | 49 +++++++++---------- .../Monoview/ExecClassifMonoView.py | 2 +- .../MonoviewClassifiers/Adaboost.py | 3 ++ .../MonoviewClassifiers/DecisionTree.py | 3 ++ .../MonoviewClassifiers/KNN.py | 3 ++ .../MonoviewClassifiers/RandomForest.py | 3 ++ .../MonoviewClassifiers/SCM.py | 5 +- .../MonoviewClassifiers/SGD.py | 2 + .../MonoviewClassifiers/SVMLinear.py | 2 + .../MonoviewClassifiers/SVMPoly.py | 2 + .../MonoviewClassifiers/SVMRBF.py | 2 + .../Multiview/Fusion/Methods/LateFusion.py | 7 ++- .../LateFusionPackage/BayesianInference.py | 2 +- .../LateFusionPackage/WeightedLinear.py | 1 + .../Multiview/GetMultiviewDb.py | 1 - .../MonoMutliViewClassifiers/utils/Dataset.py | 2 - multiview-machine-learning-omis.iml | 2 +- 17 files changed, 56 insertions(+), 35 deletions(-) diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 5cf3ce2f..9557a7c0 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -40,7 +40,7 @@ parser = argparse.ArgumentParser( groupStandard = parser.add_argument_group('Standard arguments') groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console') groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)', - default='MultiOmic') + default='Plausible') groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of database : .hdf5 or .csv', default='.hdf5') groupStandard.add_argument('--views', metavar='STRING', action='store',help='Name of the views selected for learning', @@ -55,7 +55,7 @@ groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', help='Determine the learning rate if > 1.0, number of fold for cross validation', type=float, default=0.7) groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation', - type=int, default=5 ) + type=int, default=2 ) groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int, default=2) groupClass.add_argument('--CL_classes', metavar='STRING', action='store', @@ -73,12 +73,12 @@ groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store' groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, default=1) groupClass.add_argument('--CL_statsiter', metavar='INT', action='store', help='Number of iteration for each algorithm to mean results', type=int, - default=1) + default=2) groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+", help='Determine which metrics to use, separate metric and configuration with ":". If multiple, separate with space. If no metric is specified, considering all with accuracy for classification ' 'first one will be used for classification', default=['']) groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store', - help='Determine how many Randomized grid search tests to do', type=int, default=30) + help='Determine how many Randomized grid search tests to do', type=int, default=2) groupClass.add_argument('--CL_GS_type', metavar='STRING', action='store', help='Determine which hyperparamter search function use', default="randomizedSearch") @@ -241,7 +241,7 @@ if args.CL_type.split(":")==["Benchmark"]: for fusionModulesName, fusionClasse in zip(fusionModulesNames, fusionClasses)) allMonoviewAlgos = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) - if (not isPackage) and (name!="SGD") and (name[:3]!="SVM") and (name!="SCM")] + if (not isPackage) and (name!="SGD") and (name[:3]!="SVM")] fusionMonoviewClassifiers = allMonoviewAlgos allFusionAlgos = {"Methods": fusionMethods, "Classifiers": fusionMonoviewClassifiers} allMumboAlgos = [name for _, name, isPackage in @@ -335,7 +335,7 @@ if nbCores>1: for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) accuracies = [[result[1][1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] - classifiersConfigs = [[result[1][2] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] + classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] else: resultsMonoview+=([ExecMonoview(DATASET.get("View"+str(arguments["viewIndex"])), DATASET.get("Labels").value, args.name, labelsNames, @@ -348,27 +348,8 @@ else: classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices] classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices] monoviewTime = time.time()-dataBaseTime-start -print classifiersConfigs if True: if benchmark["Multiview"]: - try: - if benchmark["Multiview"]["Mumbo"]: - for combination in itertools.combinations_with_replacement(range(len(benchmark["Multiview"]["Mumbo"])), NB_VIEW): - mumboClassifiersNames = [benchmark["Multiview"]["Mumbo"][index] for index in combination] - arguments = {"CL_type": "Mumbo", - "views": views, - "NB_VIEW": len(views), - "viewsIndices": viewsIndices, - "NB_CLASS": len(args.CL_classes.split(":")), - "LABELS_NAMES": args.CL_classes.split(":"), - "MumboKWARGS": {"classifiersNames": mumboClassifiersNames, - "maxIter":int(args.MU_iter[0]), "minIter":int(args.MU_iter[1]), - "threshold":args.MU_iter[2], - "classifiersConfigs": [argument.split(":") for argument in args.MU_config], "nbView":(len(viewsIndices))}} - argumentDictionaries["Multiview"].append(arguments) - except: - pass - try: if benchmark["Multiview"]["Fusion"]: if args.FU_cl_names.split(':') !=['']: @@ -456,9 +437,25 @@ if True: pass except: pass + try: + if benchmark["Multiview"]["Mumbo"]: + for combination in itertools.combinations_with_replacement(range(len(benchmark["Multiview"]["Mumbo"])), NB_VIEW): + mumboClassifiersNames = [benchmark["Multiview"]["Mumbo"][index] for index in combination] + arguments = {"CL_type": "Mumbo", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes.split(":")), + "LABELS_NAMES": args.CL_classes.split(":"), + "MumboKWARGS": {"classifiersNames": mumboClassifiersNames, + "maxIter":int(args.MU_iter[0]), "minIter":int(args.MU_iter[1]), + "threshold":args.MU_iter[2], + "classifiersConfigs": [argument.split(":") for argument in args.MU_config], "nbView":(len(viewsIndices))}} + argumentDictionaries["Multiview"].append(arguments) + except: + pass else: pass -# resultsMultiview = [] if nbCores>1: resultsMultiview = [] nbExperiments = len(argumentDictionaries["Multiview"]) diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py index 9919fbd6..84934343 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py @@ -38,7 +38,7 @@ def ExecMonoview_multicore(name, labelsNames, learningRate, nbFolds, datasetFile neededViewIndex = views.index(kwargs["feat"]) X = DATASET.get("View"+str(neededViewIndex)) Y = DATASET.get("Labels").value - return ExecMonoview(X, Y, name, learningRate, nbFolds, 1, databaseType, path, statsIter, gridSearch=gridSearch, + return ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, 1, databaseType, path, statsIter, gridSearch=gridSearch, metrics=metrics, nIter=nIter, **args) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py index 430a71cc..e1d2e0de 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -10,6 +10,9 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True + def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): num_estimators = int(kwargs['0']) base_estimators = DecisionTreeClassifier()#kwargs['1'] diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py index 626099e1..f9cb2679 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -10,6 +10,9 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True + def fit(DATASET, CLASS_LABELS, NB_CORES=1, **kwargs): maxDepth = int(kwargs['0']) classifier = DecisionTreeClassifier(max_depth=maxDepth) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py index 323c4e7c..9105e37c 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py @@ -10,6 +10,9 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True + def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): nNeighbors = int(kwargs['0']) classifier = KNeighborsClassifier(n_neighbors=nNeighbors) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py index 7037d6b6..f0f24ee1 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -10,6 +10,9 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True + def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): num_estimators = int(kwargs['0']) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py index 2b28f22e..423d7f84 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py @@ -18,6 +18,10 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype + +def canProbas(): + return False + def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): max_attrtibutes = kwargs['0'] try: @@ -33,7 +37,6 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): binaryAttributes = kwargs["binaryAttributes"] except: attributeClassification, binaryAttributes, dsetFile, name = transformData(DATASET) - print kwargs classifier = pyscm.scm.SetCoveringMachine(p=p, max_attributes=max_attrtibutes, model_type=model_type, verbose=False) classifier.fit(binaryAttributes, CLASS_LABELS, X=None, attribute_classifications=attributeClassification, iteration_callback=None) try: diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py index ef0d7b3b..59026a66 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py @@ -10,6 +10,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): loss = kwargs['0'] diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py index 60ed82c2..4140f7a3 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -10,6 +10,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): C = int(kwargs['0']) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py index c78492b3..7f48d225 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py @@ -10,6 +10,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): C = int(kwargs['0']) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py index f5153ebb..9234a3a5 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py @@ -10,6 +10,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): C = int(kwargs['0']) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py index 63341c29..63d870cf 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py @@ -10,8 +10,10 @@ import MonoviewClassifiers from utils.Dataset import getV -def fitMonoviewClassifier(classifierName, data, labels, classifierConfig): +def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needProbas): monoviewClassifier = getattr(MonoviewClassifiers, classifierName) + if needProbas and not monoviewClassifier.canProbas(): + monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree") classifier = monoviewClassifier.fit(data,labels,**dict((str(configIndex), config) for configIndex, config in enumerate(classifierConfig ))) @@ -28,6 +30,7 @@ class LateFusionClassifier(object): self.monoviewClassifiers = [] self.nbCores = NB_CORES self.accuracies = np.zeros(len(monoviewClassifiersNames)) + self.needProbas = False def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): @@ -38,5 +41,5 @@ class LateFusionClassifier(object): delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index], getV(DATASET, viewIndex, trainIndices), DATASET.get("Labels")[trainIndices], - self.monoviewClassifiersConfigs[index]) + self.monoviewClassifiersConfigs[index], self.needProbas) for index, viewIndex in enumerate(viewsIndices)) \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py index 36cf0015..de6b0193 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py @@ -6,7 +6,6 @@ from utils.Dataset import getV def genParamsSets(classificationKWARGS, nIter=1): - print classificationKWARGS nbView = classificationKWARGS["nbView"] paramsSets = [] for _ in range(nIter): @@ -43,6 +42,7 @@ class BayesianInference(LateFusionClassifier): # self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) self.weights = None #A modifier !! + self.needProbas = True def setParams(self, paramsSet): self.weights = paramsSet[0] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py index 6937fd10..5dcb3334 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -46,6 +46,7 @@ class WeightedLinear(LateFusionClassifier): pass else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) + self.needProbas = True def setParams(self, paramsSet): self.weights = paramsSet[0] diff --git a/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py b/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py index 76b2af6a..546a1bf4 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py +++ b/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py @@ -480,7 +480,6 @@ def getAdjacenceMatrix(RNASeqRanking, sotredRNASeq, k=2): nbGenes = RNASeqRanking.shape[1] pointer = 0 for patientIndex in range(RNASeqRanking.shape[0]): - print patientIndex for i in range(nbGenes): for j in range(k/2): try: diff --git a/Code/MonoMutliViewClassifiers/utils/Dataset.py b/Code/MonoMutliViewClassifiers/utils/Dataset.py index 98db8952..6b63b874 100644 --- a/Code/MonoMutliViewClassifiers/utils/Dataset.py +++ b/Code/MonoMutliViewClassifiers/utils/Dataset.py @@ -37,10 +37,8 @@ def extractSubset(matrix, usedIndices): if sparse.issparse(matrix): newIndptr = np.zeros(len(usedIndices)+1, dtype=int) oldindptr = matrix.indptr - print oldindptr for exampleIndexIndex, exampleIndex in enumerate(usedIndices): newIndptr[exampleIndexIndex+1] = newIndptr[exampleIndexIndex]+(oldindptr[exampleIndex+1]-oldindptr[exampleIndex]) - print newIndptr newData = np.ones(newIndptr[-1], dtype=bool) newIndices = np.zeros(newIndptr[-1], dtype=int) oldIndices = matrix.indices diff --git a/multiview-machine-learning-omis.iml b/multiview-machine-learning-omis.iml index ad3c0a36..8021953e 100644 --- a/multiview-machine-learning-omis.iml +++ b/multiview-machine-learning-omis.iml @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> -<module type="PYTHON_MODULE" version="4"> +<module type="WEB_MODULE" version="4"> <component name="NewModuleRootManager" inherit-compiler-output="true"> <exclude-output /> <content url="file://$MODULE_DIR$" /> -- GitLab