diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 9b40f4b55777d867ae686df555b8285db43b1142..ddf891d5d0d48cfc139be7ab7c5115aa29a4e338 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -216,14 +216,13 @@ def lateFusionSetArgs(views, viewsIndices, classes, method, return arguments -def initMultiviewArguments(args, benchmark, views, viewsIndices, scores, classifiersConfigs, classifiersNames, - NB_VIEW, metrics, argumentDictionaries, randomState, directory): +def initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview): multiviewArguments = [] if "Multiview" in benchmark: for multiviewAlgoName in benchmark["Multiview"]: multiviewPackage = getattr(Multiview, multiviewAlgoName) mutliviewModule = getattr(multiviewPackage, multiviewAlgoName) - multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, randomState, directory) + multiviewArguments += mutliviewModule.getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview) argumentDictionaries["Multiview"] = multiviewArguments return argumentDictionaries @@ -503,9 +502,7 @@ else: viewsIndices] monoviewTime = time.time() - dataBaseTime - start -argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, scores, classifiersConfigs, - classifiersNames, NB_VIEW, metrics[0], argumentDictionaries, randomState, - directory) +argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, argumentDictionaries, randomState, directory, resultsMonoview) if nbCores > 1: resultsMultiview = [] diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py index 79b349b1d74e9c6cd4d6850b8ae7173de218ff4b..a22e478f22d491f18ab5e96b7839a9daf63852df 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py @@ -55,12 +55,12 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol nbClass = kwargs["nbClass"] X = getValue(X) datasetLength = X.shape[0] + learningRate = len(classificationIndices[0])/(len(classificationIndices[0])+len(classificationIndices[1])) logging.debug("Done:\t Loading data") # Determine the Database to extract features - print KFolds - logging.debug("Info:\t Classification - Database:" + str(name) + " Feature:" + str(feat) + " train_size:" - + str(len(classificationIndices[0])) + ", CrossValidation k-folds: " + str(KFolds.n_splits) + ", cores:" + logging.debug("Info:\t Classification - Database:" + str(name) + " Feature:" + str(feat) + " train ratio:" + + str(learningRate) + ", CrossValidation k-folds: " + str(KFolds.n_splits) + ", cores:" + str(nbCores) + ", algorithm : " + CL_type) # y_trains = [] @@ -123,7 +123,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol timestr = time.strftime("%Y%m%d-%H%M%S") CL_type_string = CL_type outputFileName = directory + "/"+CL_type_string+"/"+"/"+feat+"/"+timestr +"Results-" + CL_type_string + "-" + labelsString + \ - '-learnRate' + str(len(classificationIndices)) + '-' + name + "-" + feat + "-" + '-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-" if not os.path.exists(os.path.dirname(outputFileName)): try: os.makedirs(os.path.dirname(outputFileName)) @@ -151,7 +151,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol logging.info("Done:\t Result Analysis") viewIndex = args["viewIndex"] - return viewIndex, [CL_type, cl_desc+[feat], metricsScores, full_labels, y_train_pred] + return viewIndex, [CL_type, cl_desc+[feat], metricsScores, full_labels, cl_res] # # Classification Report with Precision, Recall, F1 , Support # logging.debug("Info:\t Classification report:") diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py index 13ebfa6c84cf1fd534790037fe9e6770e75e9e75..67dab09e3aee185d145a277cb69982187be510c5 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -22,6 +22,13 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): return classifier +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append([randomState.randint(1, 15), DecisionTreeClassifier()]) + return paramsSet + + def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -52,7 +59,10 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, metric=["accuracy_ def getConfig(config): - try : - return "\n\t\t- Adaboost with num_esimators : "+str(config[0])+", base_estimators : "+str(config[1]) - except: - return "\n\t\t- Adaboost with num_esimators : "+str(config["0"])+", base_estimators : "+str(config["1"]) \ No newline at end of file + if type(config) not in [list, dict]: + return "\n\t\t- Adaboost with num_esimators : "+str(config.n_estimators)+", base_estimators : "+str(config.base_estimator) + else: + try: + return "\n\t\t- Adaboost with num_esimators : "+str(config[0])+", base_estimators : "+str(config[1]) + except: + return "\n\t\t- Adaboost with num_esimators : "+str(config["0"])+", base_estimators : "+str(config["1"]) \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py index 5700bd0aea81552b1f017008169398be7c06712f..67acba97c36d25f6e59f1a3148d4680752d27596 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -23,6 +23,13 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1, **kwargs): return classifier +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append([randomState.randint(1, 300), randomState.choice(["gini", "entropy"]), randomState.choice(["best", "random"])]) + return paramsSet + + def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -37,7 +44,7 @@ def getKWARGS(kwargsList): def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_DT = Pipeline([('classifier', DecisionTreeClassifier())]) - param_DT = {"classifier__max_depth": randint(1, 30), + param_DT = {"classifier__max_depth": randint(1, 300), "classifier__criterion": ["gini", "entropy"], "classifier__splitter": ["best", "random"]} metricModule = getattr(Metrics, metric[0]) @@ -55,7 +62,10 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= def getConfig(config): - try: - return "\n\t\t- Decision Tree with max_depth : "+str(config[0]) + ", criterion : "+config[1]+", splitter : "+config[2] - except: - return "\n\t\t- Decision Tree with max_depth : "+str(config["0"]) + ", criterion : "+config["1"]+", splitter : "+config["2"] \ No newline at end of file + if type(config) not in [list, dict]: + return "\n\t\t- Decision Tree with max_depth : "+str(config.max_depth) + ", criterion : "+config.criterion+", splitter : "+config.splitter + else: + try: + return "\n\t\t- Decision Tree with max_depth : "+str(config[0]) + ", criterion : "+config[1]+", splitter : "+config[2] + except: + return "\n\t\t- Decision Tree with max_depth : "+str(config["0"]) + ", criterion : "+config["1"]+", splitter : "+config["2"] \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py index a3c39983262e8c54c72b3099640c877ea5d03b15..c8947a0a2cbb70a9feafdf108984385dc69f7c42 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py @@ -25,6 +25,14 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): return classifier +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append([randomState.randint(1, 50), randomState.choice(["uniform", "distance"]), + randomState.choice(["auto", "ball_tree", "kd_tree", "brute"]), randomState.choice([1,2])]) + return paramsSet + + def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -64,7 +72,10 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= def getConfig(config): - try: - return "\n\t\t- K nearest Neighbors with n_neighbors : "+str(config[0])+", weights : "+config[1]+", algorithm : "+config[2]+", p : "+str(config[3]) - except: - return "\n\t\t- K nearest Neighbors with n_neighbors : "+str(config["0"])+", weights : "+config["1"]+", algorithm : "+config["2"]+", p : "+str(config["3"]) \ No newline at end of file + if type(config) not in [list, dict]: + return "\n\t\t- K nearest Neighbors with n_neighbors : "+str(config.n_neighbors)+", weights : "+config.weights+", algorithm : "+config.algorithm+", p : "+str(config.p) + else: + try: + return "\n\t\t- K nearest Neighbors with n_neighbors : "+str(config[0])+", weights : "+config[1]+", algorithm : "+config[2]+", p : "+str(config[3]) + except: + return "\n\t\t- K nearest Neighbors with n_neighbors : "+str(config["0"])+", weights : "+config["1"]+", algorithm : "+config["2"]+", p : "+str(config["3"]) \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py index 76ad16b92c9a089a6be40dfcebc02aeae31a1abe..9452465eebb9f6dc0668bf1a2c6334cd9f2355c2 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -23,6 +23,14 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): return classifier +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append([randomState.randint(1, 300), randomState.randint(1, 300), + randomState.choice(["gini", "entropy"])]) + return paramsSet + + def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -37,8 +45,8 @@ def getKWARGS(kwargsList): def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_rf = Pipeline([('classifier', RandomForestClassifier())]) - param_rf = {"classifier__n_estimators": randint(1, 30), - "classifier__max_depth": randint(1, 30), + param_rf = {"classifier__n_estimators": randint(1, 300), + "classifier__max_depth": randint(1, 300), "classifier__criterion": ["gini", "entropy"]} metricModule = getattr(Metrics, metric[0]) if metric[1]!=None: @@ -57,7 +65,10 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= def getConfig(config): - try: - return "\n\t\t- Random Forest with num_esimators : "+str(config[0])+", max_depth : "+str(config[1])+ ", criterion : "+config[2] - except: - return "\n\t\t- Random Forest with num_esimators : "+str(config["0"])+", max_depth : "+str(config["1"])+ ", criterion : "+config["2"] + if type(config) not in [list, dict]: + return "\n\t\t- Random Forest with num_esimators : "+str(config.n_estimators)+", max_depth : "+str(config.max_depth)+ ", criterion : "+config.criterion + else: + try: + return "\n\t\t- Random Forest with num_esimators : "+str(config[0])+", max_depth : "+str(config[1])+ ", criterion : "+config[2] + except: + return "\n\t\t- Random Forest with num_esimators : "+str(config["0"])+", max_depth : "+str(config["1"])+ ", criterion : "+config["2"] diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py index 065ee1b0a9dbd1b9366f2dc5f4763128897c3474..99706836c43406f08140a6aaa598c1f092a3dc26 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py @@ -13,10 +13,10 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype - def canProbas(): return False + def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): max_attrtibutes = kwargs['0'] try: @@ -42,6 +42,15 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): pass return classifier + +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append([randomState.randint(1, 20), randomState.random_sample(), + randomState.choice(["conjunction", "disjunction"])]) + return paramsSet + + def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -78,8 +87,8 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=None, metric=["accura # else: # kFolds = [[], range(len(y_train))] scores = [] - KFolds = KFolds.split(X_train, y_train) - for foldIdx, (trainIndices, testIndices) in enumerate(KFolds): + kFolds = KFolds.split(X_train, y_train) + for foldIdx, (trainIndices, testIndices) in enumerate(kFolds): # if fold != range(len(y_train)): # fold.sort() # trainIndices = [index for index in range(len(y_train)) if (index not in fold)] @@ -112,10 +121,13 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=None, metric=["accura def getConfig(config): - try : - return "\n\t\t- SCM with max_attributes : "+str(config[0])#+", c : "+str(config[1])+", p : "+str(config[2]) - except: - return "\n\t\t- SCM with max_attributes : "+str(config["0"])#+", c : "+str(config["1"])+", p : "+str(config["2"]) + if type(config) not in [list, dict]: + return "\n\t\t- SCM with max_attributes : "+str(config.max_attributes)+", model type : "+config.model_type+", p : "+str(config.p) + else: + try : + return "\n\t\t- SCM with max_attributes : "+str(config[0])+", p : "+str(config[1])+", model type : "+str(config[2]) + except: + return "\n\t\t- SCM with max_attributes : "+str(config["0"])+", p : "+str(config["1"])+", model type : "+str(config["2"]) def transformData(dataArray): diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py index f936eff0da29661bc2be5c15ff05ead9c9c18993..d531e8e7a74ea43ea9e28b7424f1f35e5a68c201 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py @@ -27,6 +27,14 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): return classifier +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append([randomState.choice(['log', 'modified_huber']), + randomState.choice(["l1", "l2", "elasticnet"]), randomState.random_sample()]) + return paramsSet + + def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -59,8 +67,12 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= SGD_detector.best_params_["classifier__alpha"]] return desc_params + def getConfig(config): - try: - return "\n\t\t- SGDClassifier with loss : "+config[0]+", penalty : "+config[1] - except: - return "\n\t\t- SGDClassifier with loss : "+config["0"]+", penalty : "+config["1"] \ No newline at end of file + if type(config) not in [list, dict]: + return "\n\t\t- SGDClassifier with loss : "+config.loss+", penalty : "+config.penalty+", alpha : "+str(config.alpha) + else: + try: + return "\n\t\t- SGDClassifier with loss : "+config[0]+", penalty : "+config[1]+", alpha : "+str(config[2]) + except: + return "\n\t\t- SGDClassifier with loss : "+config["0"]+", penalty : "+config["1"]+", alpha : "+str(config["2"]) \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py index 28d010c15ef5b62fa5ee7efde37c5a5a3007d61c..f9dfb3319a3e400eb297b14d52e76860b6ee8d84 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -21,6 +21,13 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): return classifier +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append([randomState.randint(1, 10000),]) + return paramsSet + + def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -48,7 +55,10 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= def getConfig(config): - try: - return "\n\t\t- SVM Linear with C : "+str(config[0]) - except: - return "\n\t\t- SVM Linear with C : "+str(config["0"]) + if type(config) not in [list, dict]: + return "\n\t\t- SVM Linear with C : "+str(config.C) + else: + try: + return "\n\t\t- SVM Linear with C : "+str(config[0]) + except: + return "\n\t\t- SVM Linear with C : "+str(config["0"]) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py index 9f5ec44cd7bb213435dc09ea8623bb2322548115..961e579104b7e365c35b4517cf39fb20dd00d23d 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py @@ -23,6 +23,13 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): return classifier +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append([randomState.randint(1, 10000), randomState.randint(1, 30)]) + return paramsSet + + def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -51,7 +58,10 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= def getConfig(config): - try: - return "\n\t\t- SVM Poly with C : "+str(config[0]) - except: - return "\n\t\t- SVM Poly with C : "+str(config["0"]) \ No newline at end of file + if type(config) not in [list, dict]: + return "\n\t\t- SVM Poly with C : "+str(config.C)+", degree : "+str(config.degree) + else: + try: + return "\n\t\t- SVM Poly with C : "+str(config[0])+", degree : "+str(config[1]) + except: + return "\n\t\t- SVM Poly with C : "+str(config["0"])+", degree : "+str(config["1"]) \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py index 230074683608dcbc6bb4c9008e1085b86eb41c6b..1ce9881c7444f4c74082e388b6f6b7c2cc903780 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py @@ -21,6 +21,13 @@ def fit(DATASET, CLASS_LABELS, randomState, NB_CORES=1,**kwargs): return classifier +def paramsToSet(nIter, randomState): + paramsSet = [] + for _ in range(nIter): + paramsSet.append([randomState.randint(1, 10000),]) + return paramsSet + + def getKWARGS(kwargsList): kwargsDict = {} for (kwargName, kwargValue) in kwargsList: @@ -46,7 +53,10 @@ def randomizedSearch(X_train, y_train, randomState, KFolds=4, nbCores=1, metric= def getConfig(config): - try: - return "\n\t\t- SVM RBF with C : "+str(config[0]) - except: - return "\n\t\t- SVM RBF with C : "+str(config["0"]) \ No newline at end of file + if type(config) not in [list, dict]: + return "\n\t\t- SVM RBF with C : "+str(config.C) + else: + try: + return "\n\t\t- SVM RBF with C : "+str(config[0]) + except: + return "\n\t\t- SVM RBF with C : "+str(config["0"]) \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py index 0aaa77b248802c13871e386b333c7eea36c81ab4..7e5a37dc3a290cacf7587c2fa9621b0d4bb71aa3 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py @@ -1,10 +1,12 @@ import sys import os.path +import errno sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) -from Multiview import * +# from Multiview import * +import Multiview import GetMultiviewDb as DB import os @@ -26,7 +28,7 @@ def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, d hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **arguments) -def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, databaseType, path, LABELS_DICTIONARY, statsIter, randomState, +def ExecMultiview(directory, DATASET, name, classificationIndices, KFolds, nbCores, databaseType, path, LABELS_DICTIONARY, statsIter, randomState, hyperParamSearch=False, metrics=None, nIter=30, **kwargs): datasetLength = DATASET.get("Metadata").attrs["datasetLength"] @@ -40,11 +42,12 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data CL_type = kwargs["CL_type"] LABELS_NAMES = kwargs["LABELS_NAMES"] classificationKWARGS = kwargs[CL_type+"KWARGS"] - + learningRate = len(classificationIndices[0])/(len(classificationIndices[0])+len(classificationIndices[1])) t_start = time.time() logging.info("### Main Programm for Multiview Classification") logging.info("### Classification - Database : " + str(name) + " ; Views : " + ", ".join(views) + - " ; Algorithm : " + CL_type + " ; Cores : " + str(nbCores)) + " ; Algorithm : " + CL_type + " ; Cores : " + str(nbCores)+", Train ratio : " + str(learningRate)+ + ", CV on " + str(KFolds.n_splits) + " folds") for viewIndex, viewName in zip(viewsIndices, views): logging.info("Info:\t Shape of " + str(viewName) + " :" + str( @@ -52,27 +55,27 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data logging.info("Done:\t Read Database Files") extractionTime = time.time() - t_start - ivalidationIndices = [] - - trainLabelsIterations = [] - testLabelsIterations = [] - classifiersIterations = [] - classifierPackage = globals()[CL_type] # Permet d'appeler un module avec une string + # ivalidationIndices = [] + learningIndices, validationIndices = classificationIndices + # trainLabelsIterations = [] + # testLabelsIterations = [] + # classifiersIterations = [] + classifierPackage = getattr(Multiview, CL_type) # Permet d'appeler un module avec une string classifierModule = getattr(classifierPackage, CL_type) classifierClass = getattr(classifierModule, CL_type) analysisModule = getattr(classifierPackage, "analyzeResults") - logging.info("Start:\t Determine validation split for ratio " + str(learningRate)) - iValidationIndices = [DB.splitDataset(DATASET, learningRate, datasetLength, randomState) for _ in range(statsIter)] - iLearningIndices = [[index for index in range(datasetLength) if index not in validationIndices] for validationIndices in iValidationIndices] - iClassificationSetLength = [len(learningIndices) for learningIndices in iLearningIndices] - logging.info("Done:\t Determine validation split") + logging.info("Train ratio : " + str(learningRate)) + # iValidationIndices = [DB.splitDataset(DATASET, classificationIndices, datasetLength, randomState) for _ in range(statsIter)] + # iLearningIndices = [[index for index in range(datasetLength) if index not in validationIndices] for validationIndices in iValidationIndices] + # iClassificationSetLength = [len(learningIndices) for learningIndices in iLearningIndices] + # logging.info("Done:\t Determine validation split") - logging.info("Start:\t Determine "+str(nbFolds)+" folds") - if nbFolds != 1: - iKFolds = [DB.getKFoldIndices(nbFolds, DATASET.get("Labels")[...], NB_CLASS, learningIndices, randomState) for learningIndices in iLearningIndices] - else: - iKFolds = [[[], range(classificationSetLength)] for classificationSetLength in iClassificationSetLength] + logging.info("CV On " + str(KFolds.n_splits) + " folds") + # if KFolds != 1: + # iKFolds = [DB.getKFoldIndices(KFolds, DATASET.get("Labels")[...], NB_CLASS, learningIndices, randomState) for learningIndices in iLearningIndices] + # else: + # iKFolds = [[[], range(classificationSetLength)] for classificationSetLength in iClassificationSetLength] # logging.info("Info:\t Length of Learning Sets: " + str(classificationSetLength - len(kFolds[0]))) # logging.info("Info:\t Length of Testing Sets: " + str(len(kFolds[0]))) @@ -84,20 +87,19 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data # logging.info("Start:\t Classification") # Begin Classification if hyperParamSearch != "None": - classifier = searchBestSettings(DATASET, CL_type, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=viewsIndices, searchingTool=hyperParamSearch, nIter=nIter, **classificationKWARGS) + classifier = searchBestSettings(DATASET, CL_type, metrics, learningIndices, KFolds, randomState, viewsIndices=viewsIndices, searchingTool=hyperParamSearch, nIter=nIter, **classificationKWARGS) else: classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS) - for _ in range(statsIter): - learningIndices, validationIndices = learningRate - classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices) - trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices) - testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices) - fullLabels = classifier.predict_hdf5(DATASET, viewsIndices=viewsIndices) - trainLabelsIterations.append(trainLabels) - testLabelsIterations.append(testLabels) - ivalidationIndices.append(validationIndices) - classifiersIterations.append(classifier) - logging.info("Done:\t Classification") + + classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices) + trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices) + testLabels = classifier.predict_hdf5(DATASET, usedIndices=validationIndices, viewsIndices=viewsIndices) + fullLabels = classifier.predict_hdf5(DATASET, viewsIndices=viewsIndices) + # trainLabelsIterations.append(trainLabels) + # testLabelsIterations.append(testLabels) + # ivalidationIndices.append(validationIndices) + # classifiersIterations.append(classifier) + logging.info("Done:\t Classification") classificationTime = time.time() - t_start @@ -106,11 +108,11 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data times = (extractionTime, classificationTime) - stringAnalysis, imagesAnalysis, metricsScores = analysisModule.execute(classifiersIterations, trainLabelsIterations, - testLabelsIterations, DATASET, - classificationKWARGS, learningRate, + stringAnalysis, imagesAnalysis, metricsScores = analysisModule.execute(classifier, trainLabels, + testLabels, DATASET, + classificationKWARGS, classificationIndices, LABELS_DICTIONARY, views, nbCores, times, - name, nbFolds, ivalidationIndices, + name, KFolds, hyperParamSearch, nIter, metrics, statsIter, viewsIndices, randomState) labelsSet = set(LABELS_DICTIONARY.values()) @@ -118,14 +120,19 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data featureString = "-".join(views) labelsString = "-".join(labelsSet) timestr = time.strftime("%Y%m%d-%H%M%S") - CL_type_string = CL_type - if CL_type=="Fusion": - CL_type_string += "-"+classificationKWARGS["fusionType"]+"-"+classificationKWARGS["fusionMethod"]+"-"+"-".join(classificationKWARGS["classifiersNames"]) - elif CL_type=="Mumbo": - CL_type_string += "-"+"-".join(classificationKWARGS["classifiersNames"]) - outputFileName = directory + timestr + "Results-" + CL_type_string + "-" + featureString + '-' + labelsString + \ + CL_type_string = classifierModule.getCLString(classificationKWARGS) + # if CL_type=="Fusion": + # CL_type_string += "-"+classificationKWARGS["fusionType"]+"-"+classificationKWARGS["fusionMethod"]+"-"+"-".join(classificationKWARGS["classifiersNames"]) + # elif CL_type=="Mumbo": + # CL_type_string += "-"+"-".join(classificationKWARGS["classifiersNames"]) + outputFileName = directory + "/" + CL_type_string + "/" + timestr + "Results-" + CL_type_string + "-" + featureString + '-' + labelsString + \ '-learnRate' + str(learningRate) + '-' + name - + if not os.path.exists(os.path.dirname(outputFileName)): + try: + os.makedirs(os.path.dirname(outputFileName)) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise outputTextFile = open(outputFileName + '.txt', 'w') outputTextFile.write(stringAnalysis) outputTextFile.close() diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py index 4e29363691e18d8cadefc64f89f3d18f1bf58c12..6bfb7a3f209e4a9063b93a674a10a5a654d27f29 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py @@ -5,6 +5,7 @@ import pkgutil # from Methods import * import Methods import MonoviewClassifiers + from utils.Dataset import getV @@ -47,7 +48,7 @@ def getBenchmark(benchmark, args=None): if not isPackage] else: benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"] = args.FU_late_methods - if "EarlyFusion" in args.FU_types: + if "EarlyFusion" in benchmark["Multiview"]["Fusion"]["Methods"]: if args.FU_early_methods == [""]: benchmark["Multiview"]["Fusion"]["Methods"]["EarlyFusion"] = [name for _, name, isPackage in pkgutil.iter_modules([ @@ -66,16 +67,15 @@ def getBenchmark(benchmark, args=None): return benchmark -def getArgs(args, benchmark, views, viewsIndices, randomState, directory): +def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview): if not "Monoview" in benchmark and not args.FU_L_select_monoview in ["randomClf", "Determined"]: args.FU_L_select_monoview = "randomClf" argumentsList = [] for fusionType in benchmark["Multiview"]["Fusion"]["Methods"]: - # import pdb;pdb.set_trace() fusionTypePackage = getattr(Methods, fusionType+"Package") for fusionMethod in benchmark["Multiview"]["Fusion"]["Methods"][fusionType]: fusionMethodModule = getattr(fusionTypePackage, fusionMethod) - arguments = fusionMethodModule.getArgs(args, views, viewsIndices, directory) + arguments = fusionMethodModule.getArgs(args, views, viewsIndices, directory, resultsMonoview) argumentsList+= arguments return argumentsList @@ -84,7 +84,7 @@ def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices= if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) if not usedIndices: - uesdIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) NB_VIEW = len(viewsIndices) if weights==None: weights = np.array([1/NB_VIEW for i in range(NB_VIEW)]) @@ -97,7 +97,7 @@ def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices= def genParamsSets(classificationKWARGS, randomState, nIter=1): fusionTypeName = classificationKWARGS["fusionType"] - fusionTypePackage = globals()[fusionTypeName+"Package"] + fusionTypePackage = getattr(Methods, fusionTypeName+"Package") fusionMethodModuleName = classificationKWARGS["fusionMethod"] fusionMethodModule = getattr(fusionTypePackage, fusionMethodModuleName) fusionMethodConfig = fusionMethodModule.genParamsSets(classificationKWARGS, randomState, nIter=nIter) @@ -133,11 +133,19 @@ def gridSearch_hdf5(DATASET, viewsIndices, classificationKWARGS, learningIndices return bestSettings, fusionMethodConfig +def getCLString(classificationKWARGS): + if classificationKWARGS["fusionType"] == "LateFusion": + return "Fusion-"+classificationKWARGS["fusionType"]+"-"+classificationKWARGS["fusionMethod"]+"-"+\ + "-".join(classificationKWARGS["classifiersNames"]) + elif classificationKWARGS["fusionType"] == "EarlyFusion": + return "Fusion-"+classificationKWARGS["fusionType"]+"-"+classificationKWARGS["fusionMethod"]+"-"+ \ + classificationKWARGS["classifiersNames"] + class Fusion: def __init__(self, randomState, NB_CORES=1, **kwargs): fusionType = kwargs['fusionType'] fusionMethod = kwargs['fusionMethod'] - fusionTypePackage = globals()[fusionType+"Package"] + fusionTypePackage = getattr(Methods, fusionType+"Package") fusionMethodModule = getattr(fusionTypePackage, fusionMethod) fusionMethodClass = getattr(fusionMethodModule, fusionMethod) nbCores = NB_CORES @@ -164,10 +172,7 @@ class Fusion: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) - if usedIndices: - predictedLabels = self.classifier.predict_hdf5(DATASET, usedIndices=usedIndices, viewsIndices=viewsIndices) - else: - predictedLabels = [] + predictedLabels = self.classifier.predict_hdf5(DATASET, usedIndices=usedIndices, viewsIndices=viewsIndices) return predictedLabels def predict_probas_hdf5(self, DATASET, usedIndices=None): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py index 1ba7f1b8eb187534234aaead7a375764508ff1b5..23bf64879b3698943e562ca85fca129abcbd4e9e 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py @@ -10,6 +10,8 @@ class EarlyFusionClassifier(object): self.monoviewClassifierName = monoviewClassifierName if type(monoviewClassifierConfig)==dict: pass + elif monoviewClassifierConfig is None: + pass else: monoviewClassifierConfig = dict((str(configIndex), config[0]) for configIndex, config in enumerate(monoviewClassifierConfig @@ -24,7 +26,7 @@ class EarlyFusionClassifier(object): if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) - if not usedIndices: + if usedIndices is None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if type(weights)== type(None): weights = np.array([1/nbView for i in range(nbView)]) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py index b4158ebc06513ef67de0cbbe387dc1d35bfc97f0..e72b0e3e9dbaa2aef6e550009f102357d14810c1 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py @@ -1,37 +1,62 @@ from ...Methods.EarlyFusion import EarlyFusionClassifier import MonoviewClassifiers import numpy as np +import pkgutil from sklearn.metrics import accuracy_score def genParamsSets(classificationKWARGS, randomState, nIter=1): nbView = classificationKWARGS["nbView"] + if classificationKWARGS["classifiersConfigs"] is None: + monoviewClassifierModule = getattr(MonoviewClassifiers, classificationKWARGS["classifiersNames"]) + paramsMonoview = monoviewClassifierModule.paramsToSet(nIter, randomState) paramsSets = [] - for _ in range(nIter): + for iterIndex in range(nIter): randomWeightsArray = randomState.random_sample(nbView) normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) - paramsSets.append([normalizedArray]) + paramsSets.append([normalizedArray, paramsMonoview[iterIndex]]) return paramsSets -def getArgs(args, views, viewsIndices, directory): +def getArgs(args, views, viewsIndices, directory, resultsMonoview): argumentsList = [] + if args.FU_E_cl_names != ['']: + pass + else: + monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) + if (not isPackage)] + args.FU_E_cl_names = monoviewClassifierModulesNames + args.FU_E_cl_config = [None for _ in monoviewClassifierModulesNames] for classifierName, classifierConfig in zip(args.FU_E_cl_names, args.FU_E_cl_config): monoviewClassifierModule = getattr(MonoviewClassifiers, classifierName) - arguments = {"CL_type": "Fusion", - "views": views, - "NB_VIEW": len(views), - "viewsIndices": viewsIndices, - "NB_CLASS": len(args.CL_classes), - "LABELS_NAMES": args.CL_classes, - "FusionKWARGS": {"fusionType": "EarlyFusion", - "fusionMethod": "WeightedLinear", - "classifiersNames": classifierName, - "classifiersConfigs": monoviewClassifierModule.getKWARGS([arg.split(":") - for arg in - classifierConfig.split(",")]), - 'fusionMethodConfig': args.FU_E_method_configs, - "nbView": (len(viewsIndices))}} + if classifierConfig is not None: + arguments = {"CL_type": "Fusion", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "FusionKWARGS": {"fusionType": "EarlyFusion", + "fusionMethod": "WeightedLinear", + "classifiersNames": classifierName, + "classifiersConfigs": monoviewClassifierModule.getKWARGS([arg.split(":") + for arg in + classifierConfig.split(",")]), + 'fusionMethodConfig': args.FU_E_method_configs, + "nbView": (len(viewsIndices))}} + else: + arguments = {"CL_type": "Fusion", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "FusionKWARGS": {"fusionType": "EarlyFusion", + "fusionMethod": "WeightedLinear", + "classifiersNames": classifierName, + "classifiersConfigs": None, + 'fusionMethodConfig': args.FU_E_method_configs, + "nbView": (len(viewsIndices))}} argumentsList.append(arguments) return argumentsList @@ -60,9 +85,9 @@ class WeightedLinear(EarlyFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): EarlyFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig'][0]==None: + if kwargs['fusionMethodConfig']==None: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) - elif kwargs['fusionMethodConfig'][0]==['']: + elif kwargs['fusionMethodConfig']==['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'])) @@ -70,17 +95,18 @@ class WeightedLinear(EarlyFusionClassifier): def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) - if not trainIndices: + if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) self.weights = self.weights/float(max(self.weights)) self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=trainIndices, viewsIndices=viewsIndices) monoviewClassifierModule = getattr(MonoviewClassifiers, self.monoviewClassifierName) - self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData, DATASET.get("Labels")[trainIndices], + self.monoviewClassifier = monoviewClassifierModule.fit(self.monoviewData, DATASET.get("Labels").value[trainIndices], self.randomState, NB_CORES=self.nbCores, #**self.monoviewClassifiersConfig) **self.monoviewClassifiersConfig) def setParams(self, paramsSet): self.weights = paramsSet[0] + self.monoviewClassifiersConfig = dict((str(index), param) for index, param in enumerate(paramsSet[1])) def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): @@ -88,28 +114,23 @@ class WeightedLinear(EarlyFusionClassifier): self.weights = self.weights/float(np.sum(self.weights)) if usedIndices == None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices, viewsIndices=viewsIndices) - predictedLabels = self.monoviewClassifier.predict(self.monoviewData) - else: - predictedLabels=[] + self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices, viewsIndices=viewsIndices) + predictedLabels = self.monoviewClassifier.predict(self.monoviewData) + return predictedLabels def predict_proba_hdf5(self, DATASET, usedIndices=None): if usedIndices == None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices) - predictedLabels = self.monoviewClassifier.predict_proba(self.monoviewData) - else: - predictedLabels=[] + self.makeMonoviewData_hdf5(DATASET, weights=self.weights, usedIndices=usedIndices) + predictedLabels = self.monoviewClassifier.predict_proba(self.monoviewData) return predictedLabels def getConfig(self, fusionMethodConfig ,monoviewClassifiersNames, monoviewClassifiersConfigs): configString = "with weighted concatenation, using weights : "+", ".join(map(str, self.weights))+ \ " with monoview classifier : " monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifiersNames) - configString += monoviewClassifierModule.getConfig(monoviewClassifiersConfigs) + configString += monoviewClassifierModule.getConfig(self.monoviewClassifiersConfig) return configString def gridSearch(self, classificationKWARGS): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py index 28407fedbf07e43cb75c636cc12190b6e70af037..fe204796f9afe84a8b02cbde144a74efd94523f9 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py @@ -7,44 +7,68 @@ from joblib import Parallel, delayed # from sklearn.multiclass import OneVsOneClassifier # from sklearn.svm import SVC import os +import sys import MonoviewClassifiers import Metrics from utils.Dataset import getV -def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needProbas): - monoviewClassifier = getattr(MonoviewClassifiers, classifierName) - if needProbas and not monoviewClassifier.canProbas(): - monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree") - classifier = monoviewClassifier.fit(data,labels,**dict((str(configIndex), config) for configIndex, config in - enumerate(classifierConfig - ))) - return classifier +def canProbasClassifier(classifierConfig): + try: + _ = getattr(classifierConfig, "predict_proba") + return True + except AttributeError: + return False + + +def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needProbas, randomState): + if type(classifierConfig[0])==dict: + monoviewClassifier = getattr(MonoviewClassifiers, classifierName) + if needProbas and not monoviewClassifier.canProbas(): + monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree") + DTConfig = {"0":300, "1":"entropy", "2":"random"} + classifier = monoviewClassifier.fit(data,labels, randomState,DTConfig) + return classifier + else: + classifier = monoviewClassifier.fit(data,labels, randomState,**dict((str(configIndex), config) for configIndex, config in + enumerate(classifierConfig + ))) + return classifier + else: + if needProbas and not canProbasClassifier(classifierConfig): + monoviewClassifier = getattr(MonoviewClassifiers, "DecisionTree") + DTConfig = {"0":300, "1":"entropy", "2":"random"} + classifier = monoviewClassifier.fit(data,labels, randomState,DTConfig) + return classifier + else: + return classifierConfig + -def getAccuracies(LateFusionClassifiers): +def getScores(LateFusionClassifiers): return "" def intersect(allClassifersNames, directory, viewsIndices): - wrongSets = [] + wrongSets = [0 for _ in allClassifersNames] nbViews = len(viewsIndices) for classifierIndex, classifierName in enumerate(allClassifersNames): - wrongSets[classifierIndex]=[] classifierDirectory = directory+"/"+classifierName+"/" - for viewIndex, viewDirectory in enumerate(os.listdir(classifierDirectory)): - for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectory+"/"): + viewDirectoryNames = os.listdir(classifierDirectory) + wrongSets[classifierIndex]=[0 for _ in viewDirectoryNames] + for viewIndex, viewDirectoryName in enumerate(viewDirectoryNames): + for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectoryName+"/"): if resultFileName.endswith("train_labels.csv"): - yTrainFileName = classifierDirectory+"/"+viewDirectory+"/"+resultFileName + yTrainFileName = classifierDirectory+"/"+viewDirectoryName+"/"+resultFileName elif resultFileName.endswith("train_pred.csv"): - yTrainPredFileName = classifierDirectory+"/"+viewDirectory+"/"+resultFileName + yTrainPredFileName = classifierDirectory+"/"+viewDirectoryName+"/"+resultFileName train = np.genfromtxt(yTrainFileName, delimiter=",").astype(np.int16) pred = np.genfromtxt(yTrainPredFileName, delimiter=",").astype(np.int16) length = len(train) wrongLabelsIndices = np.where(train+pred == 1) wrongSets[classifierIndex][viewIndex]=wrongLabelsIndices - combinations = itertools.combinations_with_replacement(range(nbViews), len(allClassifersNames)) + combinations = itertools.combinations_with_replacement(range(len(allClassifersNames)), nbViews) bestLen = length bestCombination = None for combination in combinations: @@ -91,26 +115,31 @@ def bestScore(allClassifersNames, directory, viewsIndices): def getClassifiers(selectionMethodName, allClassifiersNames, directory, viewsIndices): - selectionMethod = locals()[selectionMethodName] + thismodule = sys.modules[__name__] + selectionMethod = getattr(thismodule, selectionMethodName) classifiersNames = selectionMethod(allClassifiersNames, directory, viewsIndices) return classifiersNames -# def getConfig(classifiersNames, directory): -# for classifierIndex, classifierName in classifiersNames: -# classifierDirectory = directory+"/"+classifierName+"/" -# viewName = os.listdir(classifierDirectory)[classifierIndex] -# viewDirectory = classifierDirectory+"/"+viewName+"/" -# for resultFileName in os.listdir(classifierDirectory+"/"+viewDirectory+"/"): -# if resultFileName.endswith(".txt"): -# pass +def getConfig(classifiersNames, resultsMonoview): + classifiers = [0 for _ in range(len(classifiersNames))] + for viewIndex, classifierName in enumerate(classifiersNames): + for resultMonoview in resultsMonoview: + if resultMonoview[0]==viewIndex and resultMonoview[1][0]==classifierName: + classifiers[viewIndex]=resultMonoview[1][4] + return classifiers +def jambon(fromage): + pass class LateFusionClassifier(object): def __init__(self, randomState, monoviewClassifiersNames, monoviewClassifiersConfigs, monoviewSelection, NB_CORES=1): self.monoviewClassifiersNames = monoviewClassifiersNames - self.monoviewClassifiersConfigs = monoviewClassifiersConfigs - self.monoviewClassifiers = [] + if type(monoviewClassifiersConfigs[0])==dict: + self.monoviewClassifiersConfigs = monoviewClassifiersConfigs + self.monoviewClassifiers = [] + else: + self.monoviewClassifiersConfigs = monoviewClassifiersConfigs self.nbCores = NB_CORES self.accuracies = np.zeros(len(monoviewClassifiersNames)) self.needProbas = False @@ -122,11 +151,17 @@ class LateFusionClassifier(object): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) if trainIndices == None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - monoviewSelectionMethod = locals()[self.monoviewSelection] - self.monoviewClassifiers = monoviewSelectionMethod() + # monoviewSelectionMethod = locals()[self.monoviewSelection] + # self.monoviewClassifiers = monoviewSelectionMethod() + # a = Parallel(n_jobs=self.nbCores)( + # delayed(jambon)(DATASET.get("Labels").value[trainIndices], + # ) + # for index, viewIndex in enumerate(viewsIndices)) + # import pdb;pdb.set_trace() + self.monoviewClassifiers = Parallel(n_jobs=self.nbCores)( - delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index], - getV(DATASET, viewIndex, trainIndices), - DATASET.get("Labels")[trainIndices], - self.monoviewClassifiersConfigs[index], self.needProbas) - for index, viewIndex in enumerate(viewsIndices)) \ No newline at end of file + delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index], + getV(DATASET, viewIndex, trainIndices), + DATASET.get("Labels").value[trainIndices], + self.monoviewClassifiersConfigs[index], self.needProbas, self.randomState) + for index, viewIndex in enumerate(viewsIndices)) \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py index 25c987dd517f975d25568dc4d141748baa2faf58..f0df43a70b700a09bc414cdde8ab355dbc4e2a46 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py @@ -4,7 +4,7 @@ import pkgutil from utils.Dataset import getV import MonoviewClassifiers -from ..LateFusion import LateFusionClassifier, getClassifiers#, getConfig +from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig def genParamsSets(classificationKWARGS, randomState, nIter=1): @@ -25,9 +25,9 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): # fusionMethodConfig = args.FU_method_config # return classifiersNames, classifiersConfig, fusionMethodConfig -def getArgs(args, views, viewsIndices, directory): +def getArgs(args, views, viewsIndices, directory, resultsMonoview): if args.FU_L_cl_names!=['']: - pass + args.FU_L_select_monoview = "user_defined" else: monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) if (not isPackage)] @@ -35,11 +35,14 @@ def getArgs(args, views, viewsIndices, directory): monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] if args.FU_L_cl_config != ['']: - classifierConfig = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) + classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) for monoviewClassifierModule,classifierConfig in zip(monoviewClassifierModules,args.FU_L_cl_config)] else: - # args.FU_L_cl_config = getConfig(args.FU_L_cl_names, directory) + classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) + if args.FU_L_cl_names==[""] and args.CL_type == ["Multiview"]: + raise AttributeError("You must perform Monoview classification or specify " + "which monoview classifier to use Late Fusion") arguments = {"CL_type": "Fusion", "views": views, "NB_VIEW": len(views), @@ -49,11 +52,7 @@ def getArgs(args, views, viewsIndices, directory): "FusionKWARGS": {"fusionType": "LateFusion", "fusionMethod": "BayesianInference", "classifiersNames": args.FU_L_cl_names, - "classifiersConfigs": [monoviewClassifierModule.getKWARGS([arg.split(":") - for arg in - classifierConfig.split(",")]) - for monoviewClassifierModule,classifierConfig - in zip(monoviewClassifierModules,args.FU_L_cl_config)], + "classifiersConfigs": classifiersConfigs, 'fusionMethodConfig': args.FU_L_method_config, 'monoviewSelection': args.FU_L_select_monoview, "nbView": (len(viewsIndices))}} @@ -89,7 +88,7 @@ class BayesianInference(LateFusionClassifier): if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig']==['']: self.weights = [1.0 for classifier in kwargs['classifiersNames']] else: - self.weights = np.array(map(float, kwargs['fusionMethodConfig'])) + self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) self.needProbas = True def setParams(self, paramsSet): @@ -104,15 +103,12 @@ class BayesianInference(LateFusionClassifier): usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if sum(self.weights)!=1.0: self.weights = self.weights/sum(self.weights) - if usedIndices: - viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) - for index, viewIndex in enumerate(viewsIndices): - viewScores[index] = np.power(self.monoviewClassifiers[index].predict_proba(getV(DATASET, viewIndex, usedIndices)), - self.weights[index]) - predictedLabels = np.argmax(np.prod(viewScores, axis=0), axis=1) - else: - predictedLabels = [] + viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) + for index, viewIndex in enumerate(viewsIndices): + viewScores[index] = np.power(self.monoviewClassifiers[index].predict_proba(getV(DATASET, viewIndex, usedIndices)), + self.weights[index]) + predictedLabels = np.argmax(np.prod(viewScores, axis=0), axis=1) return predictedLabels def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): @@ -121,4 +117,5 @@ class BayesianInference(LateFusionClassifier): for monoviewClassifierConfig, monoviewClassifierName in zip(monoviewClassifiersConfigs, monoviewClassifiersNames): monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifierName) configString += monoviewClassifierModule.getConfig(monoviewClassifierConfig) + configString+="\n\t -Method used to select monoview classifiers : "+self.monoviewSelection return configString \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py index 797bad21e714ccf656021cc044dfa17847483edb..9a7fa6d465573bb2ea99089979154c86d5b17115 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py @@ -1,8 +1,9 @@ -from ...Methods.LateFusion import LateFusionClassifier +from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig import MonoviewClassifiers import numpy as np from sklearn.metrics import accuracy_score from utils.Dataset import getV +import pkgutil def genParamsSets(classificationKWARGS, randomState, nIter=1): @@ -15,8 +16,21 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(args, views, viewsIndices, directory): - monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] +def getArgs(args, views, viewsIndices, directory, resultsMonoview): + if args.FU_L_cl_names!=['']: + pass + else: + monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) + if (not isPackage)] + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) + monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) + for classifierName in args.FU_L_cl_names] + if args.FU_L_cl_config != ['']: + classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) + for monoviewClassifierModule,classifierConfig + in zip(monoviewClassifierModules,args.FU_L_cl_config)] + else: + classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) arguments = {"CL_type": "Fusion", "views": views, "NB_VIEW": len(views), @@ -24,14 +38,10 @@ def getArgs(args, views, viewsIndices, directory): "NB_CLASS": len(args.CL_classes), "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "LateFusion", - "fusionMethod": "BayesianInference", + "fusionMethod": "MajorityVoting", "classifiersNames": args.FU_L_cl_names, - "classifiersConfigs": [monoviewClassifierModule.getKWARGS([arg.split(":") - for arg in - classifierConfig.split(";")]) - for monoviewClassifierModule,classifierConfig - in zip(args.FU_L_cl_config,monoviewClassifierModules)], - 'fusionMethodConfig': args.FU_L_method_config[0], + "classifiersConfigs": classifiersConfigs, + 'fusionMethodConfig': args.FU_L_method_config, 'monoviewSelection': args.FU_L_select_monoview, "nbView": (len(viewsIndices))}} return [arguments] @@ -61,7 +71,7 @@ class MajorityVoting(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig'][0]==['']: + if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig']==['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) @@ -76,31 +86,29 @@ class MajorityVoting(LateFusionClassifier): self.weights = self.weights/float(max(self.weights)) if usedIndices == None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - datasetLength = len(usedIndices) - votes = np.zeros((datasetLength, DATASET.get("Metadata").attrs["nbClass"]), dtype=int) - monoViewDecisions = np.zeros((len(usedIndices),nbView), dtype=int) - for index, viewIndex in enumerate(viewsIndices): - monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) - for exampleIndex in range(datasetLength): - for viewIndex, featureClassification in enumerate(monoViewDecisions[exampleIndex, :]): - votes[exampleIndex, featureClassification] += self.weights[viewIndex] - nbMaximum = len(np.where(votes[exampleIndex] == max(votes[exampleIndex]))[0]) - try: - assert nbMaximum != nbView - except: - print "Majority voting can't decide, each classifier has voted for a different class" - raise - predictedLabels = np.argmax(votes, axis=1) - # Can be upgraded by restarting a new classification process if - # there are multiple maximums ?: - # while nbMaximum>1: - # relearn with only the classes that have a maximum number of vote - # votes = revote - # nbMaximum = len(np.where(votes==max(votes))[0]) - else: - predictedLabels = [] + + datasetLength = len(usedIndices) + votes = np.zeros((datasetLength, DATASET.get("Metadata").attrs["nbClass"]), dtype=int) + monoViewDecisions = np.zeros((len(usedIndices),nbView), dtype=int) + for index, viewIndex in enumerate(viewsIndices): + monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( + getV(DATASET, viewIndex, usedIndices)) + for exampleIndex in range(datasetLength): + for viewIndex, featureClassification in enumerate(monoViewDecisions[exampleIndex, :]): + votes[exampleIndex, featureClassification] += self.weights[viewIndex] + nbMaximum = len(np.where(votes[exampleIndex] == max(votes[exampleIndex]))[0]) + try: + assert nbMaximum != nbView + except: + print "Majority voting can't decide, each classifier has voted for a different class" + raise + predictedLabels = np.argmax(votes, axis=1) + # Can be upgraded by restarting a new classification process if + # there are multiple maximums ?: + # while nbMaximum>1: + # relearn with only the classes that have a maximum number of vote + # votes = revote + # nbMaximum = len(np.where(votes==max(votes))[0]) return predictedLabels def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py index f389f11ad6cec3610bdc477c0c5ec7f3ec51012c..576a1550409efb5216726fd8de68637415f5ec51 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -1,4 +1,4 @@ -from ...Methods.LateFusion import LateFusionClassifier +from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig import MonoviewClassifiers import numpy as np import pyscm @@ -12,6 +12,7 @@ from math import ceil import random from sklearn.metrics import accuracy_score import itertools +import pkgutil def genParamsSets(classificationKWARGS, randomState, nIter=1): @@ -19,15 +20,28 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): paramsSets = [] for _ in range(nIter): max_attributes = randomState.randint(1, 20) - p = randomState.random() + p = randomState.random_sample() model = randomState.choice(["conjunction", "disjunction"]) order = randomState.randint(1,nbView) paramsSets.append([p, max_attributes, model, order]) return paramsSets -def getArgs(args, views, viewsIndices, directory): - monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] +def getArgs(args, views, viewsIndices, directory, resultsMonoview): + if args.FU_L_cl_names!=['']: + pass + else: + monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) + if (not isPackage)] + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) + monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) + for classifierName in args.FU_L_cl_names] + if args.FU_L_cl_config != ['']: + classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) + for monoviewClassifierModule,classifierConfig + in zip(monoviewClassifierModules,args.FU_L_cl_config)] + else: + classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) arguments = {"CL_type": "Fusion", "views": views, "NB_VIEW": len(views), @@ -35,14 +49,10 @@ def getArgs(args, views, viewsIndices, directory): "NB_CLASS": len(args.CL_classes), "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "LateFusion", - "fusionMethod": "BayesianInference", + "fusionMethod": "SCMForLinear", "classifiersNames": args.FU_L_cl_names, - "classifiersConfigs": [monoviewClassifierModule.getKWARGS([arg.split(":") - for arg in - classifierConfig.split(";")]) - for monoviewClassifierModule,classifierConfig - in zip(args.FU_L_cl_config,monoviewClassifierModules)], - 'fusionMethodConfig': args.FU_L_method_config[0], + "classifiersConfigs": classifiersConfigs, + 'fusionMethodConfig': args.FU_L_method_config, 'monoviewSelection': args.FU_L_select_monoview, "nbView": (len(viewsIndices))}} return [arguments] @@ -77,15 +87,15 @@ class SCMForLinear(LateFusionClassifier): NB_CORES=NB_CORES) self.SCMClassifier = None # self.config = kwargs['fusionMethodConfig'][0] - if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig'][0]==['']: + if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig']==['']: self.p = 1 self.maxAttributes = 5 self.order = 1 self.modelType = "conjunction" else: - self.p = kwargs['fusionMethodConfig'][0] - self.maxAttributes = kwargs['fusionMethodConfig'][1] - self.order = kwargs['fusionMethodConfig'][2] + self.p = int(kwargs['fusionMethodConfig'][0]) + self.maxAttributes = int(kwargs['fusionMethodConfig'][1]) + self.order = int(kwargs['fusionMethodConfig'][2]) self.modelType = kwargs['fusionMethodConfig'][3] def setParams(self, paramsSet): @@ -99,14 +109,17 @@ class SCMForLinear(LateFusionClassifier): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) if trainIndices == None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - for index, viewIndex in enumerate(viewsIndices): - monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index]) - self.monoviewClassifiers.append( - monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices), - DATASET.get("Labels")[trainIndices], - NB_CORES=self.nbCores, - **dict((str(configIndex), config) for configIndex, config in - enumerate(self.monoviewClassifiersConfigs[index])))) + if type(self.monoviewClassifiersConfigs[0])==dict: + for index, viewIndex in enumerate(viewsIndices): + monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index]) + self.monoviewClassifiers.append( + monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices), + DATASET.get("Labels")[trainIndices], + NB_CORES=self.nbCores, + **dict((str(configIndex), config) for configIndex, config in + enumerate(self.monoviewClassifiersConfigs[index])))) + else: + self.monoviewClassifiers = self.monoviewClassifiersConfigs self.SCMForLinearFusionFit(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices) def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None): @@ -115,18 +128,15 @@ class SCMForLinear(LateFusionClassifier): nbView = len(viewsIndices) if usedIndices == None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - monoviewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) - accus=[] - for index, viewIndex in enumerate(viewsIndices): - monoviewDecision = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) - accus.append(accuracy_score(DATASET.get("Labels").value[usedIndices], monoviewDecision)) - monoviewDecisions[:, index] = monoviewDecision - features = self.generateInteractions(monoviewDecisions) - predictedLabels = self.SCMClassifier.predict(features) - else: - predictedLabels = [] + monoviewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) + accus=[] + for index, viewIndex in enumerate(viewsIndices): + monoviewDecision = self.monoviewClassifiers[index].predict( + getV(DATASET, viewIndex, usedIndices)) + accus.append(accuracy_score(DATASET.get("Labels").value[usedIndices], monoviewDecision)) + monoviewDecisions[:, index] = monoviewDecision + features = self.generateInteractions(monoviewDecisions) + predictedLabels = self.SCMClassifier.predict(features) return predictedLabels def SCMForLinearFusionFit(self, DATASET, usedIndices=None, viewsIndices=None): @@ -175,7 +185,7 @@ class SCMForLinear(LateFusionClassifier): dsetFile = h5py.File(name, "r") packedDataset = dsetFile.get("temp_scm") attributeClassification = BaptisteRuleClassifications(packedDataset, features.shape[0]) - self.SCMClassifier.fit(binaryAttributes, DATASET.get("Labels")[usedIndices], attribute_classifications=attributeClassification) + self.SCMClassifier.fit(binaryAttributes, DATASET.get("Labels").value[usedIndices], attribute_classifications=attributeClassification) try: dsetFile.close() os.remove(name) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py index 3d351bb2c17ae9aa2a900f30e720cc6b6a2bbb06..5acaab03ac42543d66116992519d93b94919295f 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py @@ -1,9 +1,10 @@ -from ...Methods.LateFusion import LateFusionClassifier +from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig import MonoviewClassifiers import numpy as np from sklearn.multiclass import OneVsOneClassifier from sklearn.svm import SVC from utils.Dataset import getV +import pkgutil def genParamsSets(classificationKWARGS, randomState, nIter=1): @@ -16,8 +17,21 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): # def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): # return None -def getArgs(args, views, viewsIndices, directory): - monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] +def getArgs(args, views, viewsIndices, directory, resultsMonoview): + if args.FU_L_cl_names!=['']: + pass + else: + monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) + if (not isPackage)] + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) + monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) + for classifierName in args.FU_L_cl_names] + if args.FU_L_cl_config != ['']: + classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) + for monoviewClassifierModule,classifierConfig + in zip(monoviewClassifierModules,args.FU_L_cl_config)] + else: + classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) arguments = {"CL_type": "Fusion", "views": views, "NB_VIEW": len(views), @@ -25,14 +39,10 @@ def getArgs(args, views, viewsIndices, directory): "NB_CLASS": len(args.CL_classes), "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "LateFusion", - "fusionMethod": "BayesianInference", + "fusionMethod": "SVMForLinear", "classifiersNames": args.FU_L_cl_names, - "classifiersConfigs": [monoviewClassifierModule.getKWARGS([arg.split(":") - for arg in - classifierConfig.split(";")]) - for monoviewClassifierModule,classifierConfig - in zip(args.FU_L_cl_config,monoviewClassifierModules)], - 'fusionMethodConfig': args.FU_L_method_config[0], + "classifiersConfigs": classifiersConfigs, + 'fusionMethodConfig': args.FU_L_method_config, 'monoviewSelection': args.FU_L_select_monoview, "nbView": (len(viewsIndices))}} return [arguments] @@ -48,14 +58,17 @@ class SVMForLinear(LateFusionClassifier): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) if trainIndices == None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - for index, viewIndex in enumerate(viewsIndices): - monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index]) - self.monoviewClassifiers.append( - monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices), - DATASET.get("Labels")[trainIndices], - NB_CORES=self.nbCores, - **dict((str(configIndex), config) for configIndex, config in - enumerate(self.monoviewClassifiersConfigs[index])))) + if type(self.monoviewClassifiersConfigs[0])==dict: + for index, viewIndex in enumerate(viewsIndices): + monoviewClassifier = getattr(MonoviewClassifiers, self.monoviewClassifiersNames[index]) + self.monoviewClassifiers.append( + monoviewClassifier.fit(getV(DATASET, viewIndex, trainIndices), + DATASET.get("Labels")[trainIndices], + NB_CORES=self.nbCores, + **dict((str(configIndex), config) for configIndex, config in + enumerate(self.monoviewClassifiersConfigs[index])))) + else: + self.monoviewClassifiers = self.monoviewClassifiersConfigs self.SVMForLinearFusionFit(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices) def setParams(self, paramsSet): @@ -67,14 +80,11 @@ class SVMForLinear(LateFusionClassifier): nbView = len(viewsIndices) if usedIndices == None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - monoviewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) - for index, viewIndex in enumerate(viewsIndices): - monoviewDecisions[:, index] = self.monoviewClassifiers[index].predict( - getV(DATASET, viewIndex, usedIndices)) - predictedLabels = self.SVMClassifier.predict(monoviewDecisions) - else: - predictedLabels = [] + monoviewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) + for index, viewIndex in enumerate(viewsIndices): + monoviewDecisions[:, index] = self.monoviewClassifiers[index].predict( + getV(DATASET, viewIndex, usedIndices)) + predictedLabels = self.SVMClassifier.predict(monoviewDecisions) return predictedLabels def SVMForLinearFusionFit(self, DATASET, usedIndices=None, viewsIndices=None): @@ -87,7 +97,7 @@ class SVMForLinear(LateFusionClassifier): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( getV(DATASET, viewIndex, usedIndices)) - self.SVMClassifier.fit(monoViewDecisions, DATASET.get("Labels")[usedIndices]) + self.SVMClassifier.fit(monoViewDecisions, DATASET.get("Labels").value[usedIndices]) def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): configString = "with SVM for linear \n\t-With monoview classifiers : " diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py index 34f2eb9fc879e9ca487307756eb5dff696c82eb1..c63313be2b90aec114cac09a99bbf2066eaaab63 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -1,8 +1,9 @@ -from ...Methods.LateFusion import LateFusionClassifier +from ..LateFusion import LateFusionClassifier, getClassifiers, getConfig import MonoviewClassifiers import numpy as np from sklearn.metrics import accuracy_score from utils.Dataset import getV +import pkgutil def genParamsSets(classificationKWARGS, randomState, nIter=1): @@ -15,8 +16,21 @@ def genParamsSets(classificationKWARGS, randomState, nIter=1): return paramsSets -def getArgs(args, views, viewsIndices, directory): - monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] +def getArgs(args, views, viewsIndices, directory, resultsMonoview): + if args.FU_L_cl_names!=['']: + pass + else: + monoviewClassifierModulesNames = [name for _, name, isPackage in pkgutil.iter_modules(['MonoviewClassifiers']) + if (not isPackage)] + args.FU_L_cl_names = getClassifiers(args.FU_L_select_monoview, monoviewClassifierModulesNames, directory, viewsIndices) + monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) + for classifierName in args.FU_L_cl_names] + if args.FU_L_cl_config != ['']: + classifiersConfigs = [monoviewClassifierModule.getKWARGS([arg.split(":") for arg in classifierConfig.split(",")]) + for monoviewClassifierModule,classifierConfig + in zip(monoviewClassifierModules,args.FU_L_cl_config)] + else: + classifiersConfigs = getConfig(args.FU_L_cl_names, resultsMonoview) arguments = {"CL_type": "Fusion", "views": views, "NB_VIEW": len(views), @@ -24,14 +38,10 @@ def getArgs(args, views, viewsIndices, directory): "NB_CLASS": len(args.CL_classes), "LABELS_NAMES": args.CL_classes, "FusionKWARGS": {"fusionType": "LateFusion", - "fusionMethod": "BayesianInference", + "fusionMethod": "WeightedLinear", "classifiersNames": args.FU_L_cl_names, - "classifiersConfigs": [monoviewClassifierModule.getKWARGS([arg.split(":") - for arg in - classifierConfig.split(";")]) - for monoviewClassifierModule,classifierConfig - in zip(args.FU_L_cl_config,monoviewClassifierModules)], - 'fusionMethodConfig': args.FU_L_method_config[0], + "classifiersConfigs": classifiersConfigs, + 'fusionMethodConfig': args.FU_L_method_config, 'monoviewSelection': args.FU_L_select_monoview, "nbView": (len(viewsIndices))}} return [arguments] @@ -41,7 +51,7 @@ class WeightedLinear(LateFusionClassifier): def __init__(self, randomState, NB_CORES=1, **kwargs): LateFusionClassifier.__init__(self, randomState, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig'][0]==['']: + if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig']==['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) @@ -57,14 +67,11 @@ class WeightedLinear(LateFusionClassifier): self.weights = self.weights/float(sum(self.weights)) if usedIndices == None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: - viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) - for index, viewIndex in enumerate(viewsIndices): - viewScores[index] = np.array(self.monoviewClassifiers[index].predict_proba( - getV(DATASET, viewIndex, usedIndices)))*self.weights[index] - predictedLabels = np.argmax(np.sum(viewScores, axis=0), axis=1) - else: - predictedLabels = [] + viewScores = np.zeros((nbView, len(usedIndices), DATASET.get("Metadata").attrs["nbClass"])) + for index, viewIndex in enumerate(viewsIndices): + viewScores[index] = np.array(self.monoviewClassifiers[index].predict_proba( + getV(DATASET, viewIndex, usedIndices)))*self.weights[index] + predictedLabels = np.argmax(np.sum(viewScores, axis=0), axis=1) return predictedLabels diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py index 6906723fb15730b650330196f8649ece801f6a82..d378aa0e4421f26de12ef770852772431ad0d2ef 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/analyzeResults.py @@ -31,13 +31,13 @@ def printMetricScore(metricScores, metrics): else: metricKWARGS = {} metricScoreString += "\tFor "+metricModule.getConfig(**metricKWARGS)+" : " - metricScoreString += "\n\t\t- Score on train : "+str(metricScores[metric[0]][0]) +" with STD : "+str(metricScores[metric[0]][2]) - metricScoreString += "\n\t\t- Score on test : "+str(metricScores[metric[0]][1]) +" with STD : "+str(metricScores[metric[0]][3]) + metricScoreString += "\n\t\t- Score on train : "+str(metricScores[metric[0]][0]) + metricScoreString += "\n\t\t- Score on test : "+str(metricScores[metric[0]][1]) metricScoreString += "\n\n" return metricScoreString -def getTotalMetricScores(metric, trainLabelsIterations, testLabelsIterations, DATASET, iterationValidationIndices, statsIter): +def getTotalMetricScores(metric, trainLabels, testLabels, DATASET, validationIndices): labels = DATASET.get("Labels").value DATASET_LENGTH = DATASET.get("Metadata").attrs["datasetLength"] metricModule = getattr(Metrics, metric[0]) @@ -45,30 +45,29 @@ def getTotalMetricScores(metric, trainLabelsIterations, testLabelsIterations, DA metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metric[1])) else: metricKWARGS = {} - trainScores = [] - testScores = [] - validationScores = [] - for statsIterIndex in range(statsIter): - validationIndices = iterationValidationIndices[statsIterIndex] - learningIndices = [index for index in range(DATASET_LENGTH) if index not in validationIndices] - trainScores.append(metricModule.score(labels[learningIndices], trainLabelsIterations[statsIterIndex], **metricKWARGS)) - testScores.append(metricModule.score(labels[validationIndices], testLabelsIterations[statsIterIndex], **metricKWARGS)) - return [np.mean(np.array(trainScores)), np.mean(np.array(testScores)), np.std(np.array(trainScores)), - np.std(np.array(testScores))] - - -def getMetricsScores(metrics, trainLabelsIterations, testLabelsIterations, - DATASET, validationIndices, statsIter): + + learningIndices = [index for index in range(DATASET_LENGTH) if index not in validationIndices] + trainScore = metricModule.score(labels[learningIndices], trainLabels, **metricKWARGS) + testScore = metricModule.score(labels[validationIndices], testLabels, **metricKWARGS) + return [trainScore, testScore] + + +def getMetricsScores(metrics, trainLabels, testLabels, + DATASET, validationIndices): metricsScores = {} for metric in metrics: - metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabelsIterations, testLabelsIterations, - DATASET, validationIndices, statsIter) + metricsScores[metric[0]] = getTotalMetricScores(metric, trainLabels, testLabels, + DATASET, validationIndices) return metricsScores -def execute(classifiersIterations, trainLabelsIterations,testLabelsIterations, DATASET, classificationKWARGS, - learningRate, LABELS_DICTIONARY,views, nbCores, times, name, nbFolds, ivalidationIndices, - gridSearch, nIter, metrics, statsIter, viewsIndices, randomState): +def execute(classifier, trainLabels, + testLabels, DATASET, + classificationKWARGS, classificationIndices, + LABELS_DICTIONARY, views, nbCores, times, + name, KFolds, + hyperParamSearch, nIter, metrics, statsIter, + viewsIndices, randomState): CLASS_LABELS = DATASET.get("Labels").value @@ -103,25 +102,25 @@ def execute(classifiersIterations, trainLabelsIterations,testLabelsIterations, D # # kFoldLearningTime = [np.mean([kFoldLearningTime[statsIterIndex][foldIdx] for foldIdx in range(nbFolds)])for statsIterIndex in range(statsIter)] # kFoldPredictionTime = [np.mean([kFoldPredictionTime[statsIterIndex][foldIdx] for foldIdx in range(nbFolds)])for statsIterIndex in range(statsIter)] - learningIndices = [[index for index in range(DATASET_LENGTH) if index not in ivalidationIndices[statsIterIndex]] for statsIterIndex in range(statsIter)] + learningIndices, validationIndices = classificationIndices metricModule = getattr(Metrics, metrics[0][0]) if metrics[0][1]!=None: metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metrics[0][1])) else: metricKWARGS = {} - scoreOnTrain = np.mean(np.array([metricModule.score(CLASS_LABELS[learningIndices[statsIterIndex]], trainLabelsIterations[statsIterIndex], **metricKWARGS) for statsIterIndex in range(statsIter)])) - scoreOnTest = np.mean(np.array([metricModule.score(CLASS_LABELS[ivalidationIndices[statsIterIndex]], testLabelsIterations[statsIterIndex], **metricKWARGS) for statsIterIndex in range(statsIter)])) - fusionConfiguration = classifiersIterations[0].classifier.getConfig(fusionMethodConfig,monoviewClassifiersNames, monoviewClassifiersConfigs) - stringAnalysis = "\t\tResult for Multiview classification with "+ fusionType + "and random state : "+str(randomState)+\ - "\n\nAverage "+metrics[0][0]+" :\n\t-On Train : " + str(scoreOnTrain) + "\n\t-On Test : " + str(scoreOnTest) + \ + scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], trainLabels, **metricKWARGS) + scoreOnTest = metricModule.score(CLASS_LABELS[validationIndices], testLabels, **metricKWARGS) + fusionConfiguration = classifier.classifier.getConfig(fusionMethodConfig,monoviewClassifiersNames, monoviewClassifiersConfigs) + stringAnalysis = "\t\tResult for Multiview classification with "+ fusionType + " and random state : "+str(randomState)+\ + "\n\n"+metrics[0][0]+" :\n\t-On Train : " + str(scoreOnTrain) + "\n\t-On Test : " + str(scoreOnTest) + \ "\n\nDataset info :\n\t-Database name : " + name + "\n\t-Labels : " + \ - ', '.join(LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join(views) + "\n\t-" + str(nbFolds) + \ + ', '.join(LABELS_DICTIONARY.values()) + "\n\t-Views : " + ', '.join(views) + "\n\t-" + str(KFolds.n_splits) + \ " folds\n\nClassification configuration : \n\t-Algorithm used : "+fusionType+" "+fusionConfiguration if fusionType=="LateFusion": - stringAnalysis+=Methods.LateFusion.getAccuracies(classifiersIterations) - metricsScores = getMetricsScores(metrics, trainLabelsIterations, testLabelsIterations, - DATASET, ivalidationIndices, statsIter) + stringAnalysis+=Methods.LateFusion.getScores(classifier) + metricsScores = getMetricsScores(metrics, trainLabels, testLabels, + DATASET, validationIndices) stringAnalysis+=printMetricScore(metricsScores, metrics) # stringAnalysis += "\n\nComputation time on " + str(nbCores) + " cores : \n\tDatabase extraction time : " + str( # hms(seconds=int(extractionTime))) + "\n\t" diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py index a2aacd89feb124fa6062db888f55ed0ab436000c..cd04dde64c34294c7a4272cbf01011e896b0c60a 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Classifiers/DecisionTree.py @@ -8,11 +8,11 @@ import logging import Metrics -def DecisionTree(data, labels, arg, weights): +def DecisionTree(data, labels, arg, weights, randomState): depth = int(arg[0]) subSampling = float(arg[1]) if subSampling != 1.0: - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, weights=weights) + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState, weights=weights) else: subSampledData, subSampledLabels, subSampledWeights = data, labels, weights isBad = False @@ -33,7 +33,7 @@ def getConfig(classifierConfig): return 'with depth ' + str(depth) + ', ' + ' sub-sampled at ' + str(subSampling) + ' ' -def gridSearch(data, labels, randomState, metric="accuracy_score"): +def hyperParamSearch(data, labels, randomState, metric="accuracy_score"): minSubSampling = 1.0/(len(labels)/2) bestSettings = [] bestResults = [] @@ -63,7 +63,7 @@ def gridSearch(data, labels, randomState, metric="accuracy_score"): accuracies = np.zeros(50) for i in range(50): if subSampling != 1.0: - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling) + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState) else: subSampledData, subSampledLabels, = data, labels classifier = tree.DecisionTreeClassifier(max_depth=max_depth) @@ -78,7 +78,7 @@ def gridSearch(data, labels, randomState, metric="accuracy_score"): preliminary_accuracies = np.zeros(50) if minSubSampling < 0.01: for i in range(50): - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.01) + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, 0.01, randomState) classifier.fit(subSampledData, subSampledLabels) prediction = classifier.predict(data) preliminary_accuracies[i] = accuracy_score(labels, prediction) @@ -88,7 +88,7 @@ def gridSearch(data, labels, randomState, metric="accuracy_score"): if minSubSampling < subSampling: accuracies = np.zeros(50) for i in range(50): - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling) + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState ) classifier = tree.DecisionTreeClassifier(max_depth=1) classifier.fit(subSampledData, subSampledLabels) prediction = classifier.predict(data) @@ -101,7 +101,7 @@ def gridSearch(data, labels, randomState, metric="accuracy_score"): for subSampling in sorted((np.arange(19, dtype=float)+1)/2000, reverse=True): accuracies = np.zeros(50) for i in range(50): - subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling) + subSampledData, subSampledLabels, subSampledWeights = subSample(data, labels, subSampling, randomState) if minSubSampling < subSampling: classifier1 = tree.DecisionTreeClassifier(max_depth=1) classifier1.fit(subSampledData, subSampledLabels) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py index da7ddd9d2bcf5cc94591ef0516f6457463102563..5cd16f82c3aa65fb88ed2baa87ee903a42872ba7 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py @@ -21,17 +21,14 @@ def getBenchmark(benchmark, args=None): allAlgos = [name for _, name, isPackage in pkgutil.iter_modules(['Multiview/Mumbo/Classifiers']) if not isPackage and not name in ["SubSampling", "ModifiedMulticlass", "Kover"]] - if args is None: + if args is None or args.MU_types != ['']: benchmark["Multiview"]["Mumbo"] = allAlgos else: - if args.MU_types != ['']: - benchmark["Multiview"]["Mumbo"] = args.MU_types - else : - benchmark["Multiview"]["Mumbo"] = allAlgos + benchmark["Multiview"]["Mumbo"] = args.MU_types return benchmark -def getArgs(args, benchmark, views, viewsIndices, randomState, directory): +def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview): argumentsList = [] arguments = {"CL_type": "Mumbo", @@ -72,11 +69,11 @@ def trainWeakClassifier(classifierName, monoviewDataset, CLASS_LABELS, def trainWeakClassifier_hdf5(classifierName, monoviewDataset, CLASS_LABELS, DATASET_LENGTH, - viewIndice, classifier_config, viewName, iterIndex, costMatrices, classifierIndex): + viewIndice, classifier_config, viewName, iterIndex, costMatrices, classifierIndex, randomState): weights = computeWeights(DATASET_LENGTH, iterIndex, classifierIndex, CLASS_LABELS, costMatrices) classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string classifierMethod = getattr(classifierModule, classifierName) - classifier, classes, isBad, averageAccuracy = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, weights) + classifier, classes, isBad, averageAccuracy = classifierMethod(monoviewDataset, CLASS_LABELS, classifier_config, weights, randomState) logging.debug("\t\t\tView " + str(viewIndice) + " : " + str(averageAccuracy)) return classifier, classes, isBad, averageAccuracy @@ -89,11 +86,14 @@ def gridSearch_hdf5(DATASET, viewIndices, classificationKWARGS, learningIndices, classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string classifierGridSearch = getattr(classifierModule, "hyperParamSearch") bestSettings.append(classifierGridSearch(getV(DATASET, viewIndices[classifierIndex], learningIndices), - DATASET.get("Labels")[learningIndices], metric=metric)) + DATASET.get("Labels").value[learningIndices], randomState, metric=metric)) logging.debug("\tDone:\t Gridsearch for "+classifierName) return bestSettings, None +def getCLString(classificationKWARGS): + return "Mumbo-"+"-".join(classificationKWARGS["classifiersNames"]) + class Mumbo: def __init__(self, randomState, NB_CORES=1, **kwargs): @@ -149,14 +149,14 @@ class Mumbo: def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): # Initialization - if not trainIndices: + if trainIndices is None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if type(viewsIndices)==type(None): viewsIndices = range(DATASET.get("Metadata").attrs["nbView"]) NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] NB_VIEW = len(viewsIndices) DATASET_LENGTH = len(trainIndices) - LABELS = DATASET["Labels"][trainIndices] + LABELS = DATASET.get("Labels").value[trainIndices] self.initDataDependant(DATASET_LENGTH, NB_VIEW, NB_CLASS, LABELS) # Learning isStabilized=False @@ -188,7 +188,7 @@ class Mumbo: self.updateCostmatrices(NB_VIEW, DATASET_LENGTH, NB_CLASS, LABELS) bestView, edge, bestFakeView = self.chooseView(viewsIndices, LABELS, DATASET_LENGTH) self.bestViews[self.iterIndex] = bestView - logging.debug("\t\t\t Best view : \t\t"+DATASET["View"+str(bestView)].attrs["name"]) + logging.debug("\t\t\t Best view : \t\t"+DATASET.get("View"+str(bestView)).attrs["name"]) if areBad.all(): self.generalAlphas[self.iterIndex] = 0. else: @@ -197,7 +197,7 @@ class Mumbo: self.updateGeneralFs(DATASET_LENGTH, NB_CLASS, bestFakeView) self.updateGeneralCostMatrix(DATASET_LENGTH, NB_CLASS,LABELS) predictedLabels = self.predict_hdf5(DATASET, usedIndices=trainIndices, viewsIndices=viewsIndices) - accuracy = accuracy_score(DATASET.get("Labels")[trainIndices], predictedLabels) + accuracy = accuracy_score(DATASET.get("Labels").value[trainIndices], predictedLabels) self.iterAccuracies[self.iterIndex] = accuracy self.iterIndex += 1 @@ -210,7 +210,7 @@ class Mumbo: viewsIndices = range(DATASET.get("Metadata").attrs["nbView"]) viewDict = dict((viewIndex, index) for index, viewIndex in enumerate(viewsIndices)) - if usedIndices: + if usedIndices is not None: DATASET_LENGTH = len(usedIndices) predictedLabels = np.zeros(DATASET_LENGTH) @@ -218,7 +218,7 @@ class Mumbo: votes = np.zeros(NB_CLASS) for classifier, alpha, view in zip(self.bestClassifiers, self.alphas, self.bestViews): if view != -1: - data = getV(DATASET, int(view), exampleIndex) + data = getV(DATASET, int(view), int(exampleIndex)) votes[int(classifier.predict(np.array([data])))] += alpha[viewDict[view]] else: pass @@ -231,7 +231,7 @@ class Mumbo: NB_CLASS = DATASET.get("Metadata").attrs["nbClass"] if usedIndices == None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if usedIndices: + if usedIndices is not None: DATASET_LENGTH = len(usedIndices) predictedProbas = np.zeros((DATASET_LENGTH, NB_CLASS)) @@ -286,10 +286,10 @@ class Mumbo: trainedClassifiersAndLabels = Parallel(n_jobs=NB_JOBS)( delayed(trainWeakClassifier_hdf5)(classifiersNames[classifierIndex], getV(DATASET,viewIndex,trainIndices), - DATASET.get("Labels")[trainIndices], + DATASET.get("Labels").value[trainIndices], DATASET_LENGTH, viewIndex, classifiersConfigs[classifierIndex], - DATASET.get("View"+str(viewIndex)).attrs["name"], iterIndex, costMatrices, classifierIndex) + DATASET.get("View"+str(viewIndex)).attrs["name"], iterIndex, costMatrices, classifierIndex, self.randomState) for classifierIndex, viewIndex in enumerate(viewIndices)) for viewFakeIndex, (classifier, labelsArray, isBad, averageAccuracy) in enumerate(trainedClassifiersAndLabels): diff --git a/Code/MonoMutliViewClassifiers/utils/Dataset.py b/Code/MonoMutliViewClassifiers/utils/Dataset.py index 23dca4b27cf75e634eed2026b7605fa0c2e3d7ec..35df10ecbb04a662eb5409447b5144302d86efea 100644 --- a/Code/MonoMutliViewClassifiers/utils/Dataset.py +++ b/Code/MonoMutliViewClassifiers/utils/Dataset.py @@ -5,15 +5,22 @@ import numpy as np def getV(DATASET, viewIndex, usedIndices=None): if usedIndices==None: usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) - if not DATASET.get("View"+str(viewIndex)).attrs["sparse"]: + if type(usedIndices) is int: return DATASET.get("View"+str(viewIndex))[usedIndices, :] else: - sparse_mat = sparse.csr_matrix((DATASET.get("View"+str(viewIndex)).get("data").value, - DATASET.get("View"+str(viewIndex)).get("indices").value, - DATASET.get("View"+str(viewIndex)).get("indptr").value), - shape=DATASET.get("View"+str(viewIndex)).attrs["shape"])[usedIndices,:] - - return sparse_mat + usedIndices=np.array(usedIndices) + sortedIndices = np.argsort(usedIndices) + usedIndices = usedIndices[sortedIndices] + + if not DATASET.get("View"+str(viewIndex)).attrs["sparse"]: + return DATASET.get("View"+str(viewIndex))[usedIndices, :][np.argsort(sortedIndices),:] + else: + sparse_mat = sparse.csr_matrix((DATASET.get("View"+str(viewIndex)).get("data").value, + DATASET.get("View"+str(viewIndex)).get("indices").value, + DATASET.get("View"+str(viewIndex)).get("indptr").value), + shape=DATASET.get("View"+str(viewIndex)).attrs["shape"])[usedIndices,:][np.argsort(sortedIndices),:] + + return sparse_mat def getShape(DATASET, viewIndex): diff --git a/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py b/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py index 932aa04e7297a6fdc66a809fc4f9f20554d512ff..b9fbe250ad9cd418bf444ced5bee9d9b85ba0420 100644 --- a/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py +++ b/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py @@ -18,7 +18,7 @@ def gridSearch(dataset, classifierName, viewsIndices=None, kFolds=None, nIter=1, pass -def randomizedSearch(dataset, classifierName, metrics, iLearningIndices, iKFolds, randomState, viewsIndices=None, nIter=1, nbCores=1, **classificationKWARGS): +def randomizedSearch(dataset, classifierName, metrics, learningIndices, KFolds, randomState, viewsIndices=None, nIter=1, nbCores=1, **classificationKWARGS): if viewsIndices is None: viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) metric = metrics[0] @@ -30,7 +30,7 @@ def randomizedSearch(dataset, classifierName, metrics, iLearningIndices, iKFolds classifierPackage =getattr(Multiview,classifierName) # Permet d'appeler un module avec une string classifierModule = getattr(classifierPackage, classifierName) classifierClass = getattr(classifierModule, classifierName) - statsIter = len(iLearningIndices) + statsIter = len(learningIndices) if classifierName != "Mumbo": datasetLength = dataset.get("Metadata").attrs["datasetLength"] paramsSets = classifierModule.genParamsSets(classificationKWARGS, randomState, nIter=nIter) @@ -41,20 +41,21 @@ def randomizedSearch(dataset, classifierName, metrics, iLearningIndices, iKFolds baseScore = 1000.0 isBetter = "lower" bestSettings = None + kFolds = KFolds.split(learningIndices, dataset.get("Labels").value[learningIndices]) for paramsSet in paramsSets: scores = [] - for statsIterIndex in range(statsIter): - for fold in iKFolds[statsIterIndex]: - fold.sort() - trainIndices = [index for index in range(datasetLength) if (index not in fold) and (index in iLearningIndices[statsIterIndex])] - classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) - classifier.setParams(paramsSet) - classifier.fit_hdf5(dataset, trainIndices=trainIndices, viewsIndices=viewsIndices) - # trainLabels = classifier.predict_hdf5(dataset, usedIndices=trainIndices, viewsIndices=viewsIndices) - testLabels = classifier.predict_hdf5(dataset, usedIndices=fold, viewsIndices=viewsIndices) - # trainScore = metricModule.score(dataset.get("Labels").value[trainIndices], trainLabels) - testScore = metricModule.score(dataset.get("Labels").value[fold], testLabels) - scores.append(testScore) + # for statsIterIndex in range(statsIter): + for trainIndices, testIndices in kFolds: + # fold.sort() + # trainIndices = [index for index in range(datasetLength) if (index not in fold) and (index in learningIndices[statsIterIndex])] + classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) + classifier.setParams(paramsSet) + classifier.fit_hdf5(dataset, trainIndices=learningIndices[trainIndices], viewsIndices=viewsIndices) + # trainLabels = classifier.predict_hdf5(dataset, usedIndices=trainIndices, viewsIndices=viewsIndices) + testLabels = classifier.predict_hdf5(dataset, usedIndices=learningIndices[testIndices], viewsIndices=viewsIndices) + # trainScore = metricModule.score(dataset.get("Labels").value[trainIndices], trainLabels) + testScore = metricModule.score(dataset.get("Labels").value[learningIndices[testIndices]], testLabels) + scores.append(testScore) crossValScore = np.mean(np.array(scores)) if isBetter=="higher" and crossValScore > baseScore: @@ -63,13 +64,13 @@ def randomizedSearch(dataset, classifierName, metrics, iLearningIndices, iKFolds elif isBetter=="lower" and crossValScore < baseScore: baseScore = crossValScore bestSettings = paramsSet - classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS) + classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) classifier.setParams(bestSettings) else: - bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, viewsIndices, classificationKWARGS, iLearningIndices[0], randomState, metric=metric, nIter=nIter) + bestConfigs, _ = classifierModule.gridSearch_hdf5(dataset, viewsIndices, classificationKWARGS, learningIndices, randomState, metric=metric, nIter=nIter) classificationKWARGS["classifiersConfigs"] = bestConfigs - classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS) + classifier = classifierClass(randomState, NB_CORES=nbCores, **classificationKWARGS) return classifier