diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 5cf3ce2f8e9b6722edcc18c5f0d26ce3b24bca73..9557a7c07233af94deeed1fcf75d39f37d871f1c 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -40,7 +40,7 @@ parser = argparse.ArgumentParser( groupStandard = parser.add_argument_group('Standard arguments') groupStandard.add_argument('-log', action='store_true', help='Use option to activate Logging to Console') groupStandard.add_argument('--name', metavar='STRING', action='store', help='Name of Database (default: %(default)s)', - default='MultiOmic') + default='Plausible') groupStandard.add_argument('--type', metavar='STRING', action='store', help='Type of database : .hdf5 or .csv', default='.hdf5') groupStandard.add_argument('--views', metavar='STRING', action='store',help='Name of the views selected for learning', @@ -55,7 +55,7 @@ groupClass.add_argument('--CL_split', metavar='FLOAT', action='store', help='Determine the learning rate if > 1.0, number of fold for cross validation', type=float, default=0.7) groupClass.add_argument('--CL_nbFolds', metavar='INT', action='store', help='Number of folds in cross validation', - type=int, default=5 ) + type=int, default=2 ) groupClass.add_argument('--CL_nb_class', metavar='INT', action='store', help='Number of classes, -1 for all', type=int, default=2) groupClass.add_argument('--CL_classes', metavar='STRING', action='store', @@ -73,12 +73,12 @@ groupClass.add_argument('--CL_algos_multiview', metavar='STRING', action='store' groupClass.add_argument('--CL_cores', metavar='INT', action='store', help='Number of cores, -1 for all', type=int, default=1) groupClass.add_argument('--CL_statsiter', metavar='INT', action='store', help='Number of iteration for each algorithm to mean results', type=int, - default=1) + default=2) groupClass.add_argument('--CL_metrics', metavar='STRING', action='store', nargs="+", help='Determine which metrics to use, separate metric and configuration with ":". If multiple, separate with space. If no metric is specified, considering all with accuracy for classification ' 'first one will be used for classification', default=['']) groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store', - help='Determine how many Randomized grid search tests to do', type=int, default=30) + help='Determine how many Randomized grid search tests to do', type=int, default=2) groupClass.add_argument('--CL_GS_type', metavar='STRING', action='store', help='Determine which hyperparamter search function use', default="randomizedSearch") @@ -241,7 +241,7 @@ if args.CL_type.split(":")==["Benchmark"]: for fusionModulesName, fusionClasse in zip(fusionModulesNames, fusionClasses)) allMonoviewAlgos = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) - if (not isPackage) and (name!="SGD") and (name[:3]!="SVM") and (name!="SCM")] + if (not isPackage) and (name!="SGD") and (name[:3]!="SVM")] fusionMonoviewClassifiers = allMonoviewAlgos allFusionAlgos = {"Methods": fusionMethods, "Classifiers": fusionMonoviewClassifiers} allMumboAlgos = [name for _, name, isPackage in @@ -335,7 +335,7 @@ if nbCores>1: for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) accuracies = [[result[1][1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] - classifiersConfigs = [[result[1][2] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] + classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in range(NB_VIEW)] else: resultsMonoview+=([ExecMonoview(DATASET.get("View"+str(arguments["viewIndex"])), DATASET.get("Labels").value, args.name, labelsNames, @@ -348,27 +348,8 @@ else: classifiersNames = [[result[1][0] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices] classifiersConfigs = [[result[1][1][:-1] for result in resultsMonoview if result[0]==viewIndex] for viewIndex in viewsIndices] monoviewTime = time.time()-dataBaseTime-start -print classifiersConfigs if True: if benchmark["Multiview"]: - try: - if benchmark["Multiview"]["Mumbo"]: - for combination in itertools.combinations_with_replacement(range(len(benchmark["Multiview"]["Mumbo"])), NB_VIEW): - mumboClassifiersNames = [benchmark["Multiview"]["Mumbo"][index] for index in combination] - arguments = {"CL_type": "Mumbo", - "views": views, - "NB_VIEW": len(views), - "viewsIndices": viewsIndices, - "NB_CLASS": len(args.CL_classes.split(":")), - "LABELS_NAMES": args.CL_classes.split(":"), - "MumboKWARGS": {"classifiersNames": mumboClassifiersNames, - "maxIter":int(args.MU_iter[0]), "minIter":int(args.MU_iter[1]), - "threshold":args.MU_iter[2], - "classifiersConfigs": [argument.split(":") for argument in args.MU_config], "nbView":(len(viewsIndices))}} - argumentDictionaries["Multiview"].append(arguments) - except: - pass - try: if benchmark["Multiview"]["Fusion"]: if args.FU_cl_names.split(':') !=['']: @@ -456,9 +437,25 @@ if True: pass except: pass + try: + if benchmark["Multiview"]["Mumbo"]: + for combination in itertools.combinations_with_replacement(range(len(benchmark["Multiview"]["Mumbo"])), NB_VIEW): + mumboClassifiersNames = [benchmark["Multiview"]["Mumbo"][index] for index in combination] + arguments = {"CL_type": "Mumbo", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes.split(":")), + "LABELS_NAMES": args.CL_classes.split(":"), + "MumboKWARGS": {"classifiersNames": mumboClassifiersNames, + "maxIter":int(args.MU_iter[0]), "minIter":int(args.MU_iter[1]), + "threshold":args.MU_iter[2], + "classifiersConfigs": [argument.split(":") for argument in args.MU_config], "nbView":(len(viewsIndices))}} + argumentDictionaries["Multiview"].append(arguments) + except: + pass else: pass -# resultsMultiview = [] if nbCores>1: resultsMultiview = [] nbExperiments = len(argumentDictionaries["Multiview"]) diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py index 9919fbd67693fe6ad4fc0745cd9f9350cdd77843..8493434379fbd37e4ccf30fd94ee09efc5b6093f 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py @@ -38,7 +38,7 @@ def ExecMonoview_multicore(name, labelsNames, learningRate, nbFolds, datasetFile neededViewIndex = views.index(kwargs["feat"]) X = DATASET.get("View"+str(neededViewIndex)) Y = DATASET.get("Labels").value - return ExecMonoview(X, Y, name, learningRate, nbFolds, 1, databaseType, path, statsIter, gridSearch=gridSearch, + return ExecMonoview(X, Y, name, labelsNames, learningRate, nbFolds, 1, databaseType, path, statsIter, gridSearch=gridSearch, metrics=metrics, nIter=nIter, **args) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py index 430a71ccf94bc80f6d5ab3701ecb410cabf161e9..e1d2e0dece1ee581acb73365dbe0aafc61fc3ed7 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -10,6 +10,9 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True + def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): num_estimators = int(kwargs['0']) base_estimators = DecisionTreeClassifier()#kwargs['1'] diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py index 626099e1cfd9ef4236b359c6b8a112484e5fd260..f9cb2679a470bd3977d85d277ab1b90d450dbb3e 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -10,6 +10,9 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True + def fit(DATASET, CLASS_LABELS, NB_CORES=1, **kwargs): maxDepth = int(kwargs['0']) classifier = DecisionTreeClassifier(max_depth=maxDepth) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py index 323c4e7cf4fde8ce9f6f7543b7db381f867a6ee3..9105e37c1d5caa97bdfb5588f9e315f427373367 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py @@ -10,6 +10,9 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True + def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): nNeighbors = int(kwargs['0']) classifier = KNeighborsClassifier(n_neighbors=nNeighbors) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py index 7037d6b6fe7e6ae42580c3982acf1928f9aec8cd..f0f24ee1b2dcdc87e550e1505d558b3dbb6abbfc 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -10,6 +10,9 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True + def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): num_estimators = int(kwargs['0']) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py index 2b28f22ef67f288356e671ed33b577c2a6d948fa..423d7f844204ee9a4fef07b6a766c7709e99b41e 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py @@ -18,6 +18,10 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype + +def canProbas(): + return False + def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): max_attrtibutes = kwargs['0'] try: @@ -33,7 +37,6 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): binaryAttributes = kwargs["binaryAttributes"] except: attributeClassification, binaryAttributes, dsetFile, name = transformData(DATASET) - print kwargs classifier = pyscm.scm.SetCoveringMachine(p=p, max_attributes=max_attrtibutes, model_type=model_type, verbose=False) classifier.fit(binaryAttributes, CLASS_LABELS, X=None, attribute_classifications=attributeClassification, iteration_callback=None) try: diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py index ef0d7b3b81e6973ddabce9ae841f9e479a807df3..59026a6628b1fff9551316f217daba5835b7a6b4 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py @@ -10,6 +10,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): loss = kwargs['0'] diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py index 60ed82c215034256fa9d8bc04a6402b8604517db..4140f7a3b0631534dfb5899539ea04538c757211 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -10,6 +10,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): C = int(kwargs['0']) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py index c78492b3a90c68edc63d286e994986f108072e2b..7f48d2256c3ffbf5844b764dcea6d56d90471150 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py @@ -10,6 +10,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): C = int(kwargs['0']) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py index f5153ebb00f44692382d29b8f97dd38e8855ec85..9234a3a52ed22f18a562472de750a36034df2a46 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py @@ -10,6 +10,8 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def canProbas(): + return True def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): C = int(kwargs['0']) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py index 63341c292db742c843d9bf081a7bc3c384955ff9..63d870cf1c735413d86e424ee2f60eb2e1234536 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py @@ -10,8 +10,10 @@ import MonoviewClassifiers from utils.Dataset import getV -def fitMonoviewClassifier(classifierName, data, labels, classifierConfig): +def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needProbas): monoviewClassifier = getattr(MonoviewClassifiers, classifierName) + if needProbas and not monoviewClassifier.canProbas(): + monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree") classifier = monoviewClassifier.fit(data,labels,**dict((str(configIndex), config) for configIndex, config in enumerate(classifierConfig ))) @@ -28,6 +30,7 @@ class LateFusionClassifier(object): self.monoviewClassifiers = [] self.nbCores = NB_CORES self.accuracies = np.zeros(len(monoviewClassifiersNames)) + self.needProbas = False def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): @@ -38,5 +41,5 @@ class LateFusionClassifier(object): delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index], getV(DATASET, viewIndex, trainIndices), DATASET.get("Labels")[trainIndices], - self.monoviewClassifiersConfigs[index]) + self.monoviewClassifiersConfigs[index], self.needProbas) for index, viewIndex in enumerate(viewsIndices)) \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py index 36cf0015316fb90938519d5c45c81614a203799b..de6b0193eeb06171f543e608d5e82e8b4b7bb681 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py @@ -6,7 +6,6 @@ from utils.Dataset import getV def genParamsSets(classificationKWARGS, nIter=1): - print classificationKWARGS nbView = classificationKWARGS["nbView"] paramsSets = [] for _ in range(nIter): @@ -43,6 +42,7 @@ class BayesianInference(LateFusionClassifier): # self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) self.weights = None #A modifier !! + self.needProbas = True def setParams(self, paramsSet): self.weights = paramsSet[0] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py index 6937fd10426bde6318413c6b7176d3dbc033b42b..5dcb33346e6a897c2e3e6b74513bf3739057ba3a 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -46,6 +46,7 @@ class WeightedLinear(LateFusionClassifier): pass else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) + self.needProbas = True def setParams(self, paramsSet): self.weights = paramsSet[0] diff --git a/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py b/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py index 76b2af6aeb56528847538bd8d714cbebc9dafe45..546a1bf477fb0b3b5fbc5f023280772be48fe7e7 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py +++ b/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py @@ -480,7 +480,6 @@ def getAdjacenceMatrix(RNASeqRanking, sotredRNASeq, k=2): nbGenes = RNASeqRanking.shape[1] pointer = 0 for patientIndex in range(RNASeqRanking.shape[0]): - print patientIndex for i in range(nbGenes): for j in range(k/2): try: diff --git a/Code/MonoMutliViewClassifiers/utils/Dataset.py b/Code/MonoMutliViewClassifiers/utils/Dataset.py index 98db895299fd86fa4b5a0e9d5a338bbf561709ef..6b63b874d6bd381b8d6ff393188efe4ab8a103ef 100644 --- a/Code/MonoMutliViewClassifiers/utils/Dataset.py +++ b/Code/MonoMutliViewClassifiers/utils/Dataset.py @@ -37,10 +37,8 @@ def extractSubset(matrix, usedIndices): if sparse.issparse(matrix): newIndptr = np.zeros(len(usedIndices)+1, dtype=int) oldindptr = matrix.indptr - print oldindptr for exampleIndexIndex, exampleIndex in enumerate(usedIndices): newIndptr[exampleIndexIndex+1] = newIndptr[exampleIndexIndex]+(oldindptr[exampleIndex+1]-oldindptr[exampleIndex]) - print newIndptr newData = np.ones(newIndptr[-1], dtype=bool) newIndices = np.zeros(newIndptr[-1], dtype=int) oldIndices = matrix.indices diff --git a/multiview-machine-learning-omis.iml b/multiview-machine-learning-omis.iml index ad3c0a365c8cd79b6f3291a01ea24ccdc75c0de0..8021953ed9f8cc6cd6d71c79462bad4cd2b5394c 100644 --- a/multiview-machine-learning-omis.iml +++ b/multiview-machine-learning-omis.iml @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> -<module type="PYTHON_MODULE" version="4"> +<module type="WEB_MODULE" version="4"> <component name="NewModuleRootManager" inherit-compiler-output="true"> <exclude-output /> <content url="file://$MODULE_DIR$" />