From ce8910718cd5a17983d929eae202d0664cef1eec Mon Sep 17 00:00:00 2001 From: bbauvin <baptiste.bauvin@centrale-marseille.fr> Date: Wed, 4 Oct 2017 14:26:07 -0400 Subject: [PATCH] Did a lot of reworking on passing arguments --- Code/MonoMutliViewClassifiers/ExecClassif.py | 365 ++++++++++-------- .../Metrics/__init__.py | 26 +- .../Monoview/ExecClassifMonoView.py | 26 +- .../{ClassifMonoView.py => MonoviewUtils.py} | 0 .../Monoview/__init__.py | 2 +- .../MonoviewClassifiers/Adaboost.py | 11 + .../MonoviewClassifiers/DecisionTree.py | 8 + .../MonoviewClassifiers/KNN.py | 8 + .../MonoviewClassifiers/RandomForest.py | 10 + .../MonoviewClassifiers/SCM.py | 13 + .../MonoviewClassifiers/SGD.py | 12 + .../MonoviewClassifiers/SVMLinear.py | 8 + .../MonoviewClassifiers/SVMPoly.py | 10 + .../MonoviewClassifiers/SVMRBF.py | 9 + .../MonoviewClassifiers/__init__.py | 18 +- .../Multiview/ExecMultiview.py | 19 +- .../Multiview/Fusion/Fusion.py | 15 +- .../Multiview/Fusion/Methods/EarlyFusion.py | 8 +- .../EarlyFusionPackage/WeightedLinear.py | 29 +- .../Multiview/Fusion/Methods/LateFusion.py | 11 +- .../LateFusionPackage/BayesianInference.py | 38 +- .../LateFusionPackage/MajorityVoting.py | 28 +- .../Methods/LateFusionPackage/SCMForLinear.py | 55 ++- .../Methods/LateFusionPackage/SVMForLinear.py | 22 +- .../LateFusionPackage/WeightedLinear.py | 27 +- .../Multiview/Mumbo/Mumbo.py | 22 +- .../ResultAnalysis.py | 6 +- .../utils/HyperParameterSearch.py | 2 +- 28 files changed, 583 insertions(+), 225 deletions(-) rename Code/MonoMutliViewClassifiers/Monoview/{ClassifMonoView.py => MonoviewUtils.py} (100%) diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 23622b86..aefc98d2 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -18,6 +18,7 @@ import matplotlib # Import own modules import Multiview import Metrics +import MonoviewClassifiers from Multiview.ExecMultiview import ExecMultiview, ExecMultiview_multicore from Monoview.ExecClassifMonoView import ExecMonoview, ExecMonoview_multicore import Multiview.GetMultiviewDb as DB @@ -158,7 +159,7 @@ def initBenchmark(args): else: algosMutliview = args.CL_algos_multiview if "Mumbo" in algosMutliview: - benchmark["Multiview"]["Mumbo"] = args.MU_types.split(":") + benchmark["Multiview"]["Mumbo"] = args.MU_types if "Fusion" in algosMutliview: benchmark["Multiview"]["Fusion"] = {} benchmark["Multiview"]["Fusion"]["Methods"] = dict( @@ -219,22 +220,35 @@ def initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATA return argumentDictionaries -def initKWARGS(args): - kwargsInit = { - "RandomForestKWARGSInit": {"0": map(int, args.CL_RF_trees.split())[0], - "1": map(int, args.CL_RF_max_depth.split(":"))[0]}, - "SVMLinearKWARGSInit": {"0": map(int, args.CL_SVML_C.split(":"))[0]}, - "SVMRBFKWARGSInit": {"0": map(int, args.CL_SVMR_C.split(":"))[0]}, - "SVMPolyKWARGSInit": {"0": map(int, args.CL_SVMP_C.split(":"))[0], - '1': map(int, args.CL_SVMP_deg.split(":"))[0]}, - "DecisionTreeKWARGSInit": {"0": map(int, args.CL_DT_depth.split(":"))[0]}, - "SGDKWARGSInit": {"2": map(float, args.CL_SGD_alpha.split(":"))[0], "1": args.CL_SGD_penalty.split(":")[0], - "0": args.CL_SGD_loss.split(":")[0]}, - "KNNKWARGSInit": {"0": map(float, args.CL_KNN_neigh.split(":"))[0]}, - "AdaboostKWARGSInit": {"0": args.CL_Ada_n_est.split(":")[0], "1": args.CL_Ada_b_est.split(":")[0]}, - "SCMKWARGSInit": {"0": args.CL_SCM_max_rules.split(":")[0]}, - } - return kwargsInit +def initMonoviewKWARGS(args, classifiersNames): + monoviewKWARGS = {} + for classifiersName in classifiersNames: + classifierModule = getattr(MonoviewClassifiers, classifiersName) + monoviewKWARGS[classifiersName+"KWARGSInit"] = classifierModule.getKWARGS([(key, value) for key, value in vars(args).iteritems() if key.startswith("CL_"+classifiersName)]) + return monoviewKWARGS + + +def initKWARGS(args, benchmark): + if "Monoview" in benchmark: + monoviewKWARGS = initMonoviewKWARGS(args, benchmark["Monoview"]) + + + + # kwargsInit = { + # "RandomForestKWARGSInit": {"0": map(int, args.CL_RF_trees.split())[0], + # "1": map(int, args.CL_RF_max_depth.split(":"))[0]}, + # "SVMLinearKWARGSInit": {"0": map(int, args.CL_SVML_C.split(":"))[0]}, + # "SVMRBFKWARGSInit": {"0": map(int, args.CL_SVMR_C.split(":"))[0]}, + # "SVMPolyKWARGSInit": {"0": map(int, args.CL_SVMP_C.split(":"))[0], + # '1': map(int, args.CL_SVMP_deg.split(":"))[0]}, + # "DecisionTreeKWARGSInit": {"0": map(int, args.CL_DT_depth.split(":"))[0]}, + # "SGDKWARGSInit": {"2": map(float, args.CL_SGD_alpha.split(":"))[0], "1": args.CL_SGD_penalty.split(":")[0], + # "0": args.CL_SGD_loss.split(":")[0]}, + # "KNNKWARGSInit": {"0": map(float, args.CL_KNN_neigh.split(":"))[0]}, + # "AdaboostKWARGSInit": {"0": args.CL_Ada_n_est.split(":")[0], "1": args.CL_Ada_b_est.split(":")[0]}, + # "SCMKWARGSInit": {"0": args.CL_SCM_max_rules.split(":")[0]}, + # } + return monoviewKWARGS def lateFusionSetArgs(views, viewsIndices, classes, method, @@ -254,101 +268,111 @@ def lateFusionSetArgs(views, viewsIndices, classes, method, def initMultiviewArguments(args, benchmark, views, viewsIndices, scores, classifiersConfigs, classifiersNames, - fusionMethodConfig, NB_VIEW, metrics): - metricModule = getattr(Metrics, metrics[0]) - if benchmark["Multiview"]: - if "Fusion" in benchmark["Multiview"]: - if args.FU_cl_names != ['']: - print "Formage" - exit() - monoClassifiers = args.FU_cl_names - monoClassifiersConfigs = [globals()[classifier + "KWARGS"] for classifier in monoClassifiers] - if args.FU_method_config != [""]: - fusionMethodConfigs = [map(float, config.split(":")) for config in args.FU_method_config] - elif not gridSearch: - raise ValueError("No config for fusion method given and no gridearch wanted") - else: - try: - fusionMethodConfigs = [["config"] for method in - benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]] - except: - pass - try: - for methodIndex, method in enumerate(benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]): - if args.FU_fixed: - arguments = lateFusionSetArgs(views, viewsIndices, args.CL_classes, method, - args.FU_cl_names, monoClassifiersConfigs, - fusionMethodConfigs[methodIndex]) - argumentDictionaries["Multiview"].append(arguments) - else: - for combination in itertools.combinations_with_replacement(range(len(monoClassifiers)), - NB_VIEW): - monoClassifiersNamesComb = [monoClassifiers[index] for index in combination] - monoClassifiersConfigsComb = [monoClassifiersConfigs[index] for index in - combination] - arguments = lateFusionSetArgs(views, viewsIndices, args.CL_classes, method, - monoClassifiersNamesComb, monoClassifiersConfigsComb, - fusionMethodConfigs[methodIndex]) - argumentDictionaries["Multiview"].append(arguments) - except: - pass - else: - if "LateFusion" in benchmark["Multiview"]["Fusion"]["Methods"] and \ - "Classifiers" in benchmark["Multiview"]["Fusion"]: - bestClassifiers = [] - bestClassifiersConfigs = [] - if argumentDictionaries["Monoview"] != {}: - for viewIndex, view in enumerate(views): - if metricModule.getConfig()[-14] == "h": - bestClassifiers.append( - classifiersNames[viewIndex][np.argmax(np.array(scores[viewIndex]))]) - bestClassifiersConfigs.append( - classifiersConfigs[viewIndex][np.argmax(np.array(scores[viewIndex]))]) - else: - bestClassifiers.append( - classifiersNames[viewIndex][np.argmin(np.array(scores[viewIndex]))]) - bestClassifiersConfigs.append( - classifiersConfigs[viewIndex][np.argmin(np.array(scores[viewIndex]))]) - else: - raise AttributeError("No Monoview classifiers asked in args and no monoview benchmark done.") - for method in benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]: - arguments = lateFusionSetArgs(views, viewsIndices, args.CL_classes, method, - bestClassifiers, bestClassifiersConfigs, - fusionMethodConfig) - argumentDictionaries["Multiview"].append(arguments) - if "EarlyFusion" in benchmark["Multiview"]["Fusion"]["Methods"] and \ - "Classifiers" in benchmark["Multiview"]["Fusion"]: - for method in benchmark["Multiview"]["Fusion"]["Methods"]["EarlyFusion"]: - for classifier in benchmark["Multiview"]["Fusion"]["Classifiers"]: - arguments = {"CL_type": "Fusion", - "views": views, - "NB_VIEW": len(views), - "viewsIndices": viewsIndices, - "NB_CLASS": len(args.CL_classes), - "LABELS_NAMES": args.CL_classes, - "FusionKWARGS": {"fusionType": "EarlyFusion", "fusionMethod": method, - "classifiersNames": [classifier], - "classifiersConfigs": [ - initKWARGS[classifier + "KWARGSInit"]], - 'fusionMethodConfig': fusionMethodConfig, - "nbView": (len(viewsIndices))}} - argumentDictionaries["Multiview"].append(arguments) - if "Mumbo" in benchmark["Multiview"]: - for combination in itertools.combinations_with_replacement(range(len(benchmark["Multiview"]["Mumbo"])), - NB_VIEW): - mumboClassifiersNames = [benchmark["Multiview"]["Mumbo"][index] for index in combination] - arguments = {"CL_type": "Mumbo", - "views": views, - "NB_VIEW": len(views), - "viewsIndices": viewsIndices, - "NB_CLASS": len(args.CL_classes), - "LABELS_NAMES": args.CL_classes, - "MumboKWARGS": {"classifiersNames": mumboClassifiersNames, - "maxIter": int(args.MU_iter[0]), "minIter": int(args.MU_iter[1]), - "threshold": args.MU_iter[2], - "classifiersConfigs": [argument.split(":") for argument in - args.MU_config], "nbView": (len(viewsIndices))}} - argumentDictionaries["Multiview"].append(arguments) + NB_VIEW, metrics, argumentDictionaries): + # metricModule = getattr(Metrics, metrics[0]) + multiviewArguments = [] + if "Multiview" in benchmark: + for multiviewAlgoName in benchmark["Multiview"]: + multiviewPackage = getattr(Multiview, multiviewAlgoName) + mutliviewModule = getattr(multiviewPackage, multiviewAlgoName) + multiviewArguments+= mutliviewModule.getArgs(args, benchmark, views, viewsIndices) + # if benchmark["Multiview"]: + # for multiviewAlgoName in benchmark["Multiview"]: + # multiviewPackage = getattr(Multiview, multiviewAlgoName) + # multiviewArguments[] + # if "Fusion" in benchmark["Multiview"]: + # for method in benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]: + # import pdb; pdb.set_trace() + # if args.FU_cl_names != ['']: + # monoClassifiers = args.FU_cl_names + # monoClassifiersConfigs = [globals()[classifier + "KWARGS"] for classifier in monoClassifiers] + # if args.FU_method_config != [""]: + # fusionMethodConfigs = [map(float, config.split(":")) for config in args.FU_method_config] + # elif not hyperParamSearch: + # raise ValueError("No config for fusion method given and no gridearch wanted") + # else: + # try: + # fusionMethodConfigs = [["config"] for method in + # benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]] + # except: + # pass + # try: + # for methodIndex, method in enumerate(benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]): + # if args.FU_fixed: + # arguments = lateFusionSetArgs(views, viewsIndices, args.CL_classes, method, + # args.FU_cl_names, monoClassifiersConfigs, + # fusionMethodConfigs[methodIndex]) + # argumentDictionaries["Multiview"].append(arguments) + # else: + # for combination in itertools.combinations_with_replacement(range(len(monoClassifiers)), + # NB_VIEW): + # monoClassifiersNamesComb = [monoClassifiers[index] for index in combination] + # monoClassifiersConfigsComb = [monoClassifiersConfigs[index] for index in + # combination] + # arguments = lateFusionSetArgs(views, viewsIndices, args.CL_classes, method, + # monoClassifiersNamesComb, monoClassifiersConfigsComb, + # fusionMethodConfigs[methodIndex]) + # argumentDictionaries["Multiview"].append(arguments) + # except: + # pass + # else: + # if "LateFusion" in benchmark["Multiview"]["Fusion"]["Methods"] and \ + # "Classifiers" in benchmark["Multiview"]["Fusion"]: + # bestClassifiers = [] + # bestClassifiersConfigs = [] + # if argumentDictionaries["Monoview"] != {}: + # for viewIndex, view in enumerate(views): + # if metricModule.getConfig()[-14] == "h": + # bestClassifiers.append( + # classifiersNames[viewIndex][np.argmax(np.array(scores[viewIndex]))]) + # bestClassifiersConfigs.append( + # classifiersConfigs[viewIndex][np.argmax(np.array(scores[viewIndex]))]) + # else: + # bestClassifiers.append( + # classifiersNames[viewIndex][np.argmin(np.array(scores[viewIndex]))]) + # bestClassifiersConfigs.append( + # classifiersConfigs[viewIndex][np.argmin(np.array(scores[viewIndex]))]) + # else: + # raise AttributeError("No Monoview classifiers asked in args and no monoview benchmark done.") + # for method in benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]: + # arguments = lateFusionSetArgs(views, viewsIndices, args.CL_classes, method, + # bestClassifiers, bestClassifiersConfigs, + # fusionMethodConfig) + # argumentDictionaries["Multiview"].append(arguments) + # if "EarlyFusion" in benchmark["Multiview"]["Fusion"]["Methods"] and \ + # "Classifiers" in benchmark["Multiview"]["Fusion"]: + # for method in benchmark["Multiview"]["Fusion"]["Methods"]["EarlyFusion"]: + # for classifier in benchmark["Multiview"]["Fusion"]["Classifiers"]: + # arguments = {"CL_type": "Fusion", + # "views": views, + # "NB_VIEW": len(views), + # "viewsIndices": viewsIndices, + # "NB_CLASS": len(args.CL_classes), + # "LABELS_NAMES": args.CL_classes, + # "FusionKWARGS": {"fusionType": "EarlyFusion", "fusionMethod": method, + # "classifiersNames": [classifier], + # "classifiersConfigs": [ + # initKWARGS[classifier + "KWARGSInit"]], + # 'fusionMethodConfig': fusionMethodConfig, + # "nbView": (len(viewsIndices))}} + # argumentDictionaries["Multiview"].append(arguments) + # if "Mumbo" in benchmark["Multiview"]: + # for combination in itertools.combinations_with_replacement(range(len(benchmark["Multiview"]["Mumbo"])), + # NB_VIEW): + # mumboClassifiersNames = [benchmark["Multiview"]["Mumbo"][index] for index in combination] + # arguments = {"CL_type": "Mumbo", + # "views": views, + # "NB_VIEW": len(views), + # "viewsIndices": viewsIndices, + # "NB_CLASS": len(args.CL_classes), + # "LABELS_NAMES": args.CL_classes, + # "MumboKWARGS": {"classifiersNames": mumboClassifiersNames, + # "maxIter": int(args.MU_iter[0]), "minIter": int(args.MU_iter[1]), + # "threshold": args.MU_iter[2], + # "classifiersConfigs": [argument.split(":") for argument in + # args.MU_config], "nbView": (len(viewsIndices))}} + # argumentDictionaries["Multiview"].append(arguments) + argumentDictionaries["Multiview"] = multiviewArguments return argumentDictionaries @@ -415,61 +439,66 @@ groupClass.add_argument('--CL_metric_princ', metavar='STRING', action='store', help='Determine which metric to use for randomSearch and optimization' , default="f1_score") groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store', help='Determine how many Randomized grid search tests to do', type=int, default=2) -groupClass.add_argument('--CL_GS_type', metavar='STRING', action='store', +groupClass.add_argument('--CL_HPS_type', metavar='STRING', action='store', help='Determine which hyperparamter search function use', default="randomizedSearch") groupRF = parser.add_argument_group('Random Forest arguments') -groupRF.add_argument('--CL_RF_trees', metavar='STRING', action='store', help='Number max trees', - default='25') -groupRF.add_argument('--CL_RF_max_depth', metavar='STRING', action='store', help='Max depth for the trees', - default='5') +groupRF.add_argument('--CL_RandomForest_trees', metavar='INT', type=int, action='store', help='Number max trees', + default=25) +groupRF.add_argument('--CL_RandomForest_max_depth', metavar='INT', type=int, action='store', help='Max depth for the trees', + default=5) groupSVMLinear = parser.add_argument_group('Linear SVM arguments') -groupSVMLinear.add_argument('--CL_SVML_C', metavar='STRING', action='store', help='Penalty parameter used', - default='1') +groupSVMLinear.add_argument('--CL_SVMLinear_C', metavar='INT', type=int, action='store', help='Penalty parameter used', + default=1) groupSVMRBF = parser.add_argument_group('SVW-RBF arguments') -groupSVMRBF.add_argument('--CL_SVMR_C', metavar='STRING', action='store', help='Penalty parameter used', - default='1') +groupSVMRBF.add_argument('--CL_SVMRBF_C', metavar='INT', type=int, action='store', help='Penalty parameter used', + default=1) groupSVMPoly = parser.add_argument_group('Poly SVM arguments') -groupSVMPoly.add_argument('--CL_SVMP_C', metavar='STRING', action='store', help='Penalty parameter used', - default='1') -groupSVMPoly.add_argument('--CL_SVMP_deg', metavar='STRING', action='store', help='Degree parameter used', - default='2') +groupSVMPoly.add_argument('--CL_SVMPoly_C', metavar='INT', type=int, action='store', help='Penalty parameter used', + default=1) +groupSVMPoly.add_argument('--CL_SVMPoly_deg', metavar='INT', type=int, action='store', help='Degree parameter used', + default=2) groupAdaboost = parser.add_argument_group('Adaboost arguments') -groupAdaboost.add_argument('--CL_Ada_n_est', metavar='STRING', action='store', help='Number of estimators', - default='2') -groupAdaboost.add_argument('--CL_Ada_b_est', metavar='STRING', action='store', help='Estimators', +groupAdaboost.add_argument('--CL_Adaboost_n_est', metavar='INT', type=int, action='store', help='Number of estimators', + default=2) +groupAdaboost.add_argument('--CL_Adaboost_b_est', metavar='STRING', action='store', help='Estimators', default='DecisionTreeClassifier') -groupRF = parser.add_argument_group('Decision Trees arguments') -groupRF.add_argument('--CL_DT_depth', metavar='STRING', action='store', - help='Determine max depth for Decision Trees', default='3') +groupDT = parser.add_argument_group('Decision Trees arguments') +groupDT.add_argument('--CL_DecisionTree_depth', metavar='INT', type=int, action='store', + help='Determine max depth for Decision Trees', default=3) groupSGD = parser.add_argument_group('SGD arguments') -groupSGD.add_argument('--CL_SGD_alpha', metavar='STRING', action='store', - help='Determine alpha for SGDClassifier', default='0.1') +groupSGD.add_argument('--CL_SGD_alpha', metavar='FLOAT', type=float, action='store', + help='Determine alpha for SGDClassifier', default=0.1) groupSGD.add_argument('--CL_SGD_loss', metavar='STRING', action='store', help='Determine loss for SGDClassifier', default='log') groupSGD.add_argument('--CL_SGD_penalty', metavar='STRING', action='store', help='Determine penalty for SGDClassifier', default='l2') -groupSGD = parser.add_argument_group('KNN arguments') -groupSGD.add_argument('--CL_KNN_neigh', metavar='STRING', action='store', - help='Determine number of neighbors for KNN', default='1') +groupKNN = parser.add_argument_group('KNN arguments') +groupKNN.add_argument('--CL_KNN_neigh', metavar='INT', type=int, action='store', + help='Determine number of neighbors for KNN', default=1) + +groupSCM = parser.add_argument_group('SCM arguments') +groupSCM.add_argument('--CL_SCM_max_rules', metavar='INT', type=int, action='store', + help='Max number of rules for SCM', default=1) +groupSCM.add_argument('--CL_SCM_p', metavar='FLOAT', type=float, action='store', + help='Max number of rules for SCM', default=1.0) +groupSCM.add_argument('--CL_SCM_model_type', metavar='STRING', action='store', + help='Max number of rules for SCM', default="conjunction") -groupSGD = parser.add_argument_group('SCM arguments') -groupSGD.add_argument('--CL_SCM_max_rules', metavar='STRING', action='store', - help='Max number of rules for SCM', default='1') groupMumbo = parser.add_argument_group('Mumbo arguments') -groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', - help='Determine which monoview classifier to use with Mumbo', default='DecisionTree') +groupMumbo.add_argument('--MU_types', metavar='STRING', action='store', nargs="+", + help='Determine which monoview classifier to use with Mumbo', default=['DecisionTree', 'DecisionTree', 'DecisionTree']) groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='+', help='Configuration for the monoview classifier in Mumbo', - default=['']) + default=['2:0.5', '2:0.5', '2:0.5']) groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3, help='Max number of iteration, min number of iteration, convergence threshold', type=float, default=[10, 1, 0.01]) @@ -477,21 +506,34 @@ groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3, groupFusion = parser.add_argument_group('Fusion arguments') groupFusion.add_argument('--FU_types', metavar='STRING', action='store', nargs="+", help='Determine which type of fusion to use', - default=['LateFusion','EarlyFusion']) -groupFusion.add_argument('--FU_early_methods', metavar='STRING', action='store', nargs="+", + default=['']) +groupEarlyFusion = parser.add_argument_group('Early Fusion arguments') +groupEarlyFusion.add_argument('--FU_early_methods', metavar='STRING', action='store', nargs="+", help='Determine which early fusion method of fusion to use', default=['']) -groupFusion.add_argument('--FU_late_methods', metavar='STRING', action='store', nargs="+", +groupEarlyFusion.add_argument('--FU_E_method_configs', metavar='STRING', action='store', nargs='+', + help='Configuration for the early fusion methods separate method by space and values by :', + default=['']) +groupEarlyFusion.add_argument('--FU_E_cl_config', metavar='STRING', action='store', nargs='+', + help='Configuration for the monoview classifiers used separate classifier by space ' + 'and configs must be of form argument1_name:value,argument2_name:value', + default=['']) +groupEarlyFusion.add_argument('--FU_E_cl_names', metavar='STRING', action='store', nargs='+', + help='Name of the classifiers used for each early fusion method', default=['']) + + +groupLateFusion = parser.add_argument_group('Late Early Fusion arguments') +groupLateFusion.add_argument('--FU_late_methods', metavar='STRING', action='store', nargs="+", help='Determine which late fusion method of fusion to use', default=['']) -groupFusion.add_argument('--FU_method_config', metavar='STRING', action='store', nargs='+', +groupLateFusion.add_argument('--FU_L_method_config', metavar='STRING', action='store', nargs='+', help='Configuration for the fusion method', default=['']) -groupFusion.add_argument('--FU_cl_config', metavar='STRING', action='store', nargs='+', +groupLateFusion.add_argument('--FU_L_cl_config', metavar='STRING', action='store', nargs='+', help='Configuration for the monoview classifiers used', default=['']) -groupFusion.add_argument('--FU_cl_names', metavar='STRING', action='store', nargs="+", +groupLateFusion.add_argument('--FU_L_cl_names', metavar='STRING', action='store', nargs="+", help='Names of the classifier used for late fusion', default=['']) -groupFusion.add_argument('--FU_fixed', action='store_true', - help='Determine if you want fusion for the monoview classifier in the same order as written') +groupLateFusion.add_argument('--FU_L_select_monoview', metavar='STRING', action='store', + help='Determine which method to use to select the monoview classifiers', default="intersect") args = parser.parse_args() os.nice(args.nice) @@ -504,10 +546,7 @@ if args.name not in ["MultiOmic", "ModifiedMultiOmic", "Caltech", "Fake", "Plaus else: getDatabase = getattr(DB, "get" + args.name + "DB" + args.type[1:]) -try: - gridSearch = args.CL_GS_type -except: - gridSearch = False +hyperParamSearch = args.CL_HPS_type directory = initLogFile(args) @@ -537,12 +576,13 @@ logging.info("Start:\t Finding all available mono- & multiview algorithms") benchmark = initBenchmark(args) -fusionMethodConfig = [args.FU_method_config[0].split(":"), "b"] +# fusionMethodConfig = [args.FU_method_config[0].split(":"), "b"] -initKWARGS = initKWARGS(args) +initKWARGS = initKWARGS(args, benchmark) dataBaseTime = time.time() - start -argumentDictionaries = {"Monoview": {}, "Multiview": []} + +argumentDictionaries = {"Monoview": [], "Multiview": []} argumentDictionaries = initMonoviewArguments(benchmark, argumentDictionaries, views, allViews, DATASET, NB_CLASS, initKWARGS) @@ -555,7 +595,7 @@ if nbCores > 1: for stepIndex in range(int(math.ceil(float(nbExperiments) / nbCores))): resultsMonoview += (Parallel(n_jobs=nbCores)( delayed(ExecMonoview_multicore)(directory, args.name, labelsNames, args.CL_split, args.CL_nbFolds, - coreIndex, args.type, args.pathF, statsIter, gridSearch=gridSearch, + coreIndex, args.type, args.pathF, statsIter, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Monoview"][coreIndex + stepIndex * nbCores]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores)))) @@ -570,7 +610,7 @@ else: resultsMonoview += ([ExecMonoview(directory, DATASET.get("View" + str(arguments["viewIndex"])), DATASET.get("Labels").value, args.name, labelsNames, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, statsIter, - gridSearch=gridSearch, metrics=metrics, nIter=args.CL_GS_iter, + hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in argumentDictionaries["Monoview"]]) scores = [[result[1][2][metrics[0][0]][1] for result in resultsMonoview if result[0] == viewIndex] for viewIndex @@ -581,8 +621,9 @@ else: viewsIndices] monoviewTime = time.time() - dataBaseTime - start + argumentDictionaries = initMultiviewArguments(args, benchmark, views, viewsIndices, scores, classifiersConfigs, - classifiersNames, fusionMethodConfig, NB_VIEW, metrics[0]) + classifiersNames, NB_VIEW, metrics[0], argumentDictionaries) if nbCores > 1: resultsMultiview = [] @@ -591,14 +632,14 @@ if nbCores > 1: resultsMultiview += Parallel(n_jobs=nbCores)( delayed(ExecMultiview_multicore)(directory, coreIndex, args.name, args.CL_split, args.CL_nbFolds, args.type, args.pathF, - LABELS_DICTIONARY, statsIter, gridSearch=gridSearch, + LABELS_DICTIONARY, statsIter, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **argumentDictionaries["Multiview"][stepIndex * nbCores + coreIndex]) for coreIndex in range(min(nbCores, nbExperiments - stepIndex * nbCores))) else: resultsMultiview = [ ExecMultiview(directory, DATASET, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, - LABELS_DICTIONARY, statsIter, gridSearch=gridSearch, + LABELS_DICTIONARY, statsIter, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=args.CL_GS_iter, **arguments) for arguments in argumentDictionaries["Multiview"]] multiviewTime = time.time() - monoviewTime - dataBaseTime - start diff --git a/Code/MonoMutliViewClassifiers/Metrics/__init__.py b/Code/MonoMutliViewClassifiers/Metrics/__init__.py index 9bbd76fb..2e9bab08 100644 --- a/Code/MonoMutliViewClassifiers/Metrics/__init__.py +++ b/Code/MonoMutliViewClassifiers/Metrics/__init__.py @@ -4,4 +4,28 @@ for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): continue __import__(module[:-3], locals(), globals()) del module -del os \ No newline at end of file +del os + +""" +To be able to add another metric to the benchmark you must : + +Create a .py file named after the metric +Define a score function + Input : + y_true : np array with the real labels + y_pred : np array with the predicted labels + kwargs : every argument that is specific to the metric + Returns: + score : the metric's score (float) +Define a get_scorer function + Input : + kwargs : every argument that is specific to the metric + Returns : + scorer : an object similar to an sk-learn scorer +Define a getConfig function + Input : + kwargs : every argument that is specific to the metric + Output : + configString : A string that gives the name of the metric and explains how it is configured. Must end by + (lower is better) or (higher is better) to be able to analyze the results +""" \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py index 949d48f1..f4a2f3ea 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py +++ b/Code/MonoMutliViewClassifiers/Monoview/ExecClassifMonoView.py @@ -16,7 +16,7 @@ from sklearn import metrics # For stastics on classification import h5py # Import own modules -import ClassifMonoView # Functions for classification +import MonoviewUtils # Functions for classification import ExportResults # Functions to render results import MonoviewClassifiers import Metrics @@ -29,7 +29,7 @@ __status__ = "Prototype" # Production, Development, Prototype __date__ = 2016-03-25 -def ExecMonoview_multicore(directory, name, labelsNames, learningRate, nbFolds, datasetFileIndex, databaseType, path, statsIter, gridSearch=True, +def ExecMonoview_multicore(directory, name, labelsNames, learningRate, nbFolds, datasetFileIndex, databaseType, path, statsIter, hyperParamSearch="randomizedSearch", metrics=[["accuracy_score", None]], nIter=30, **args): DATASET = h5py.File(path+name+str(datasetFileIndex)+".hdf5", "r") kwargs = args["args"] @@ -37,12 +37,12 @@ def ExecMonoview_multicore(directory, name, labelsNames, learningRate, nbFolds, neededViewIndex = views.index(kwargs["feat"]) X = DATASET.get("View"+str(neededViewIndex)) Y = DATASET.get("Labels").value - return ExecMonoview(directory, X, Y, name, labelsNames, learningRate, nbFolds, 1, databaseType, path, statsIter, gridSearch=gridSearch, + return ExecMonoview(directory, X, Y, name, labelsNames, learningRate, nbFolds, 1, databaseType, path, statsIter, hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **args) -def ExecMonoview(directory, X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databaseType, path, statsIter, gridSearch=True, - metrics=[["accuracy_score", None]], nIter=30, **args): +def ExecMonoview(directory, X, Y, name, labelsNames, learningRate, nbFolds, nbCores, databaseType, path, statsIter, hyperParamSearch="randomizedSearch", + metrics=[["accuracy_score", None]], nIter=30, **args): logging.debug("Start:\t Loading data") try: kwargs = args["args"] @@ -54,7 +54,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, learningRate, nbFolds, nbCo nbClass = kwargs["nbClass"] X = getValue(X) datasetLength = X.shape[0] - clKWARGS = kwargs[kwargs["CL_type"]+"KWARGS"] + logging.debug("Done:\t Loading data") # Determine the Database to extract features logging.debug("Info:\t Classification - Database:" + str(name) + " Feature:" + str(feat) + " train_size:" + str(learningRate) + ", CrossValidation k-folds:" + str(nbFolds) + ", cores:" + str(nbCores)+", algorithm : "+CL_type) @@ -66,7 +66,7 @@ def ExecMonoview(directory, X, Y, name, labelsNames, learningRate, nbFolds, nbCo for iterationStat in range(statsIter): # Calculate Train/Test data logging.debug("Start:\t Determine Train/Test split"+" for iteration "+str(iterationStat+1)) - testIndices = ClassifMonoView.splitDataset(Y, nbClass, learningRate, datasetLength) + testIndices = MonoviewUtils.splitDataset(Y, nbClass, learningRate, datasetLength) trainIndices = [i for i in range(datasetLength) if i not in testIndices] X_train = extractSubset(X,trainIndices) X_test = extractSubset(X,testIndices) @@ -80,13 +80,15 @@ def ExecMonoview(directory, X, Y, name, labelsNames, learningRate, nbFolds, nbCo # Begin Classification RandomForest classifierModule = getattr(MonoviewClassifiers, CL_type) - classifierGridSearch = getattr(classifierModule, "gridSearch") - if gridSearch: + if hyperParamSearch != "None": + classifierGridSearch = getattr(classifierModule, hyperParamSearch) logging.debug("Start:\t RandomSearch best settings with "+str(nIter)+" iterations for "+CL_type) cl_desc = classifierGridSearch(X_train, y_train, nbFolds=nbFolds, nbCores=nbCores, metric=metrics[0], nIter=nIter) clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc)) logging.debug("Done:\t RandomSearch best settings") + else: + clKWARGS = kwargs[kwargs["CL_type"]+"KWARGS"] logging.debug("Start:\t Training") cl_res = classifierModule.fit(X_train, y_train, NB_CORES=nbCores, **clKWARGS) logging.debug("Done:\t Training") @@ -107,9 +109,9 @@ def ExecMonoview(directory, X, Y, name, labelsNames, learningRate, nbFolds, nbCo logging.debug("Start:\t Getting Results") - stringAnalysis, imagesAnalysis, metricsScores = execute(name, learningRate, nbFolds, nbCores, gridSearch, metrics, nIter, feat, CL_type, - clKWARGS, labelsNames, X.shape, - y_trains, y_train_preds, y_tests, y_test_preds, t_end, statsIter) + stringAnalysis, imagesAnalysis, metricsScores = execute(name, learningRate, nbFolds, nbCores, hyperParamSearch, metrics, nIter, feat, CL_type, + clKWARGS, labelsNames, X.shape, + y_trains, y_train_preds, y_tests, y_test_preds, t_end, statsIter) cl_desc = [value for key, value in sorted(clKWARGS.iteritems())] logging.debug("Done:\t Getting Results") logging.info(stringAnalysis) diff --git a/Code/MonoMutliViewClassifiers/Monoview/ClassifMonoView.py b/Code/MonoMutliViewClassifiers/Monoview/MonoviewUtils.py similarity index 100% rename from Code/MonoMutliViewClassifiers/Monoview/ClassifMonoView.py rename to Code/MonoMutliViewClassifiers/Monoview/MonoviewUtils.py diff --git a/Code/MonoMutliViewClassifiers/Monoview/__init__.py b/Code/MonoMutliViewClassifiers/Monoview/__init__.py index f597921f..8e96d5a2 100644 --- a/Code/MonoMutliViewClassifiers/Monoview/__init__.py +++ b/Code/MonoMutliViewClassifiers/Monoview/__init__.py @@ -1 +1 @@ -from . import ExecClassifMonoView, ClassifMonoView \ No newline at end of file +from . import ExecClassifMonoView, MonoviewUtils \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py index 86bf631b..406861fe 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/Adaboost.py @@ -13,6 +13,7 @@ __status__ = "Prototype" # Production, Development, P def canProbas(): return True + def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): num_estimators = int(kwargs['0']) base_estimators = DecisionTreeClassifier()#kwargs['1'] @@ -21,6 +22,16 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return classifier +def getKWARGS(kwargsList): + kwargsDict = {} + for (kwargName, kwargValue) in kwargsList: + if kwargName == "CL_Adaboost_n_est": + kwargsDict['0'] = int(kwargValue) + elif kwargName == "CL_Adaboost_b_est": + kwargsDict['1'] = kwargValue + return kwargsDict + + def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIter=30, nbCores=1): pipeline = Pipeline([('classifier', AdaBoostClassifier())]) diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py index 8c372934..3a13c587 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/DecisionTree.py @@ -20,6 +20,14 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1, **kwargs): return classifier +def getKWARGS(kwargsList): + kwargsDict = {} + for (kwargName, kwargValue) in kwargsList: + if kwargName == "CL_DecisionTree_depth": + kwargsDict['0'] = int(kwargValue) + return kwargsDict + + def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_DT = Pipeline([('classifier', DecisionTreeClassifier())]) param_DT = {"classifier__max_depth": randint(1, 30)} diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py index 6ed4dd89..234ff43e 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/KNN.py @@ -20,6 +20,14 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return classifier +def getKWARGS(kwargsList): + kwargsDict = {} + for (kwargName, kwargValue) in kwargsList: + if kwargName == "CL_KNN_neigh": + kwargsDict['0'] = int(kwargValue) + return kwargsDict + + def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30 ): pipeline_KNN = Pipeline([('classifier', KNeighborsClassifier())]) param_KNN = {"classifier__n_neighbors": randint(1, 50)} diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py index 16a4646c..370f679b 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/RandomForest.py @@ -22,6 +22,16 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return classifier +def getKWARGS(kwargsList): + kwargsDict = {} + for (kwargName, kwargValue) in kwargsList: + if kwargName == "CL_RandomForest_trees": + kwargsDict['0'] = int(kwargValue) + elif kwargName == "CL_RandomForest_max_depth": + kwargsDict['1'] = kwargValue + return kwargsDict + + def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_rf = Pipeline([('classifier', RandomForestClassifier())]) param_rf = {"classifier__n_estimators": randint(1, 30), diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py index 423d7f84..19a46355 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py @@ -46,6 +46,19 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): pass return classifier +def getKWARGS(kwargsList): + kwargsDict = {} + for (kwargName, kwargValue) in kwargsList: + if kwargName == "CL_SCM_max_rules": + kwargsDict['0'] = int(kwargValue) + elif kwargName == "CL_SCM_p": + kwargsDict['1'] = int(kwargValue) + elif kwargName == "CL_SCM_model_type": + kwargsDict['2'] = kwargValue + return kwargsDict + + + def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIter=30, nbCores=1): diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py index 89125059..93fb3910 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SGD.py @@ -25,6 +25,18 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return classifier +def getKWARGS(kwargsList): + kwargsDict = {} + for (kwargName, kwargValue) in kwargsList: + if kwargName == "CL_SGD_loss": + kwargsDict['0'] = kwargValue + elif kwargName == "CL_SGD_penalty": + kwargsDict['1'] = kwargValue + elif kwargName == "CL_SGD_alpha": + kwargsDict['2'] = float(kwargValue) + return kwargsDict + + def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_SGD = Pipeline([('classifier', SGDClassifier())]) losses = ['log', 'modified_huber'] diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py index c6b7bbe2..0229110d 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMLinear.py @@ -20,6 +20,14 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return classifier +def getKWARGS(kwargsList): + kwargsDict = {} + for (kwargName, kwargValue) in kwargsList: + if kwargName == "CL_SVMLinear_C": + kwargsDict['0'] = int(kwargValue) + return kwargsDict + + def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_SVMLinear = Pipeline([('classifier', SVC(kernel="linear", max_iter=1000))]) param_SVMLinear = {"classifier__C":randint(1, 10000)} diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py index 1a719a83..d5506e6d 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMPoly.py @@ -21,6 +21,16 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return classifier +def getKWARGS(kwargsList): + kwargsDict = {} + for (kwargName, kwargValue) in kwargsList: + if kwargName == "CL_SVMPoly_C": + kwargsDict['0'] = int(kwargValue) + elif kwargName == "CL_SVMPoly_deg": + kwargsDict['1'] = int(kwargValue) + return kwargsDict + + def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_SVMPoly = Pipeline([('classifier', SVC(kernel="poly", max_iter=1000))]) param_SVMPoly = {"classifier__C": randint(1, 10000), "classifier__degree":randint(1, 30)} diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py index ed4ddda7..e232c1ba 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SVMRBF.py @@ -13,6 +13,7 @@ __status__ = "Prototype" # Production, Development, P def canProbas(): return True + def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): C = int(kwargs['0']) classifier = SVC(C=C, kernel='rbf', probability=True, max_iter=1000) @@ -20,6 +21,14 @@ def fit(DATASET, CLASS_LABELS, NB_CORES=1,**kwargs): return classifier +def getKWARGS(kwargsList): + kwargsDict = {} + for (kwargName, kwargValue) in kwargsList: + if kwargName == "CL_SVMRBF_C": + kwargsDict['0'] = int(kwargValue) + return kwargsDict + + def gridSearch(X_train, y_train, nbFolds=4, nbCores=1, metric=["accuracy_score", None], nIter=30): pipeline_SVMRBF = Pipeline([('classifier', SVC(kernel="rbf", max_iter=1000))]) param_SVMRBF = {"classifier__C": randint(1, 10000)} diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/__init__.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/__init__.py index 9bbd76fb..caed9645 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/__init__.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/__init__.py @@ -4,4 +4,20 @@ for module in os.listdir(os.path.dirname(os.path.realpath(__file__))): continue __import__(module[:-3], locals(), globals()) del module -del os \ No newline at end of file +del os + +""" +To be able to add a Monoview Classifier to the benchmark, one has to : +Create a .py file named after the classifier +Define a canProbas function returning True or False whether the classifier is able to predict class probabilities +Define a fit function + Input : + DATASET : The data matrix used to fit the classifier + CLASS_LABELS : The labels' array of the training set + NB_CORES : The number of cores the classifier can use to train + kwargs : Any argument specific to the classifier + Output : + classifier : A classifier object, similar to the sk-learn classifier object +Define a + +""" \ No newline at end of file diff --git a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py index 42903449..77e14c20 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py +++ b/Code/MonoMutliViewClassifiers/Multiview/ExecMultiview.py @@ -21,14 +21,14 @@ __status__ = "Prototype" # Production, Development, Pr def ExecMultiview_multicore(directory, coreIndex, name, learningRate, nbFolds, databaseType, path, LABELS_DICTIONARY, statsIter, - gridSearch=False, nbCores=1, metrics=None, nIter=30, **arguments): + hyperParamSearch=False, nbCores=1, metrics=None, nIter=30, **arguments): DATASET = h5py.File(path+name+str(coreIndex)+".hdf5", "r") return ExecMultiview(directory, DATASET, name, learningRate, nbFolds, 1, databaseType, path, LABELS_DICTIONARY, statsIter, - gridSearch=gridSearch, metrics=metrics, nIter=nIter, **arguments) + hyperParamSearch=hyperParamSearch, metrics=metrics, nIter=nIter, **arguments) def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, databaseType, path, LABELS_DICTIONARY, statsIter, - gridSearch=False, metrics=None, nIter=30, **kwargs): + hyperParamSearch=False, metrics=None, nIter=30, **kwargs): datasetLength = DATASET.get("Metadata").attrs["datasetLength"] NB_VIEW = kwargs["NB_VIEW"] @@ -59,8 +59,7 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data classifiersIterations = [] classifierPackage = globals()[CL_type] # Permet d'appeler un module avec une string classifierModule = getattr(classifierPackage, CL_type) - # classifierClass = getattr(classifierModule, CL_type) - # classifierGridSearch = getattr(classifierModule, "gridSearch_hdf5") + classifierClass = getattr(classifierModule, CL_type) analysisModule = getattr(classifierPackage, "analyzeResults") logging.info("Start:\t Determine validation split for ratio " + str(learningRate)) @@ -84,7 +83,11 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data # logging.info("Start:\t Learning with " + CL_type + " and " + str(len(kFolds)) + " folds") # logging.info("Start:\t Classification") # Begin Classification - classifier = searchBestSettings(DATASET, CL_type, metrics, iLearningIndices, iKFolds,viewsIndices=viewsIndices, searchingTool=gridSearch, nIter=nIter, **classificationKWARGS) + if hyperParamSearch != "None": + classifier = searchBestSettings(DATASET, CL_type, metrics, iLearningIndices, iKFolds, viewsIndices=viewsIndices, searchingTool=hyperParamSearch, nIter=nIter, **classificationKWARGS) + else: + classifier = classifierClass(NB_CORES=nbCores, **classificationKWARGS) + # classifier.setParams(classificationKWARGS) for _ in range(statsIter): classifier.fit_hdf5(DATASET, trainIndices=learningIndices, viewsIndices=viewsIndices) trainLabels = classifier.predict_hdf5(DATASET, usedIndices=learningIndices, viewsIndices=viewsIndices) @@ -106,9 +109,9 @@ def ExecMultiview(directory, DATASET, name, learningRate, nbFolds, nbCores, data stringAnalysis, imagesAnalysis, metricsScores = analysisModule.execute(classifiersIterations, trainLabelsIterations, testLabelsIterations, DATASET, classificationKWARGS, learningRate, - LABELS_DICTIONARY,views, nbCores, times, + LABELS_DICTIONARY, views, nbCores, times, name, nbFolds, ivalidationIndices, - gridSearch, nIter, metrics, statsIter, + hyperParamSearch, nIter, metrics, statsIter, viewsIndices) labelsSet = set(LABELS_DICTIONARY.values()) logging.info(stringAnalysis) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py index 62c9d41a..2790bf71 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Fusion.py @@ -10,6 +10,19 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype +def getArgs(args, benchmark, views, viewsIndices): + if not "Monoview" in benchmark and not args.FU_L_select_monoview in ["randomClf", "Determined"]: + args.FU_L_select_monoview = "randomClf" + argumentsList = [] + # import pdb; pdb.set_trace() + for fusionType in benchmark["Multiview"]["Fusion"]["Methods"]: + fusionTypePackage = globals()[fusionType+"Package"] + for fusionMethod in benchmark["Multiview"]["Fusion"]["Methods"][fusionType]: + fusionMethodModule = getattr(fusionTypePackage, fusionMethod) + arguments = fusionMethodModule.getArgs(args, views, viewsIndices) + argumentsList+= arguments + return argumentsList + def makeMonoviewData_hdf5(DATASET, weights=None, usedIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): @@ -47,7 +60,7 @@ def gridSearch_hdf5(DATASET, viewsIndices, classificationKWARGS, learningIndices for classifierIndex, classifierName in enumerate(classifiersNames): logging.debug("\tStart:\t Random search for "+classifierName+ " with "+str(nIter)+" iterations") classifierModule = getattr(MonoviewClassifiers, classifierName) - classifierMethod = getattr(classifierModule, "gridSearch") + classifierMethod = getattr(classifierModule, "hyperParamSearch") if fusionTypeName == "LateFusion": bestSettings.append(classifierMethod(getV(DATASET, viewsIndices[classifierIndex], learningIndices), DATASET.get("Labels")[learningIndices], metric=metric, diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py index a87bab42..cb17c4c7 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py @@ -7,14 +7,14 @@ from utils.Dataset import getV class EarlyFusionClassifier(object): def __init__(self, monoviewClassifierName, monoviewClassifierConfig, NB_CORES=1): - self.monoviewClassifierName = monoviewClassifierName[0] - if type(monoviewClassifierConfig[0])==dict: + self.monoviewClassifierName = monoviewClassifierName + if type(monoviewClassifierConfig)==dict: pass else: - monoviewClassifierConfig[0] = dict((str(configIndex), config[0]) for configIndex, config in + monoviewClassifierConfig = dict((str(configIndex), config[0]) for configIndex, config in enumerate(monoviewClassifierConfig )) - self.monoviewClassifiersConfig = monoviewClassifierConfig[0] + self.monoviewClassifiersConfig = monoviewClassifierConfig self.monoviewClassifier = None self.nbCores = NB_CORES self.monoviewData = None diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py index 83edaf8b..624ed6f7 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py @@ -14,6 +14,27 @@ def genParamsSets(classificationKWARGS, nIter=1): return paramsSets +def getArgs(args, views, viewsIndices): + argumentsList = [] + for classifierName, classifierConfig in zip(args.FU_E_cl_names, args.FU_E_cl_config): + monoviewClassifierModule = getattr(MonoviewClassifiers, classifierName) + arguments = {"CL_type": "Fusion", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "FusionKWARGS": {"fusionType": "EarlyFusion", + "fusionMethod": "WeightedLinear", + "classifiersNames": classifierName, + "classifiersConfigs": monoviewClassifierModule.getKWARGS([arg.split(":") + for arg in + classifierConfig.split(",")]), + 'fusionMethodConfig': args.FU_E_method_configs, + "nbView": (len(viewsIndices))}} + argumentsList.append(arguments) + return argumentsList + # def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): # if type(viewsIndices)==type(None): # viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) @@ -42,9 +63,9 @@ class WeightedLinear(EarlyFusionClassifier): if kwargs['fusionMethodConfig'][0]==None: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) elif kwargs['fusionMethodConfig'][0]==['']: - pass + self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) else: - self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) + self.weights = np.array(map(float, kwargs['fusionMethodConfig'])) def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): @@ -87,8 +108,8 @@ class WeightedLinear(EarlyFusionClassifier): def getConfig(self, fusionMethodConfig ,monoviewClassifiersNames, monoviewClassifiersConfigs): configString = "with weighted concatenation, using weights : "+", ".join(map(str, self.weights))+ \ " with monoview classifier : " - monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifiersNames[0]) - configString += monoviewClassifierModule.getConfig(monoviewClassifiersConfigs[0]) + monoviewClassifierModule = getattr(MonoviewClassifiers, monoviewClassifiersNames) + configString += monoviewClassifierModule.getConfig(monoviewClassifiersConfigs) return configString def gridSearch(self, classificationKWARGS): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py index 63d870cf..9b616adf 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusion.py @@ -19,24 +19,33 @@ def fitMonoviewClassifier(classifierName, data, labels, classifierConfig, needPr ))) return classifier + def getAccuracies(LateFusionClassifiers): return "" +def Intersect(resMono): + pass + + + class LateFusionClassifier(object): - def __init__(self, monoviewClassifiersNames, monoviewClassifiersConfigs, NB_CORES=1): + def __init__(self, monoviewClassifiersNames, monoviewClassifiersConfigs, monoviewSelection, NB_CORES=1): self.monoviewClassifiersNames = monoviewClassifiersNames self.monoviewClassifiersConfigs = monoviewClassifiersConfigs self.monoviewClassifiers = [] self.nbCores = NB_CORES self.accuracies = np.zeros(len(monoviewClassifiersNames)) self.needProbas = False + self.monoviewSelection = monoviewSelection def fit_hdf5(self, DATASET, trainIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) if trainIndices == None: trainIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + monoviewSelectionMethod = locals()[self.monoviewSelection] + self.monoviewClassifiers = monoviewSelectionMethod() self.monoviewClassifiers = Parallel(n_jobs=self.nbCores)( delayed(fitMonoviewClassifier)(self.monoviewClassifiersNames[index], getV(DATASET, viewIndex, trainIndices), diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py index 88628f66..c80dd48c 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/BayesianInference.py @@ -13,6 +13,36 @@ def genParamsSets(classificationKWARGS, nIter=1): normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) paramsSets.append([normalizedArray]) return paramsSets + + +# def getArgs(args, benchmark): +# classifiersNames = args.FU_cl_names +# classifiersConfig = [getattr(MonoviewClassifiers, name).getKWARGS([arg.split(":") +# for arg in config.split(";")]) +# for config, name in zip(args.FU_cl_config, classifiersNames)] +# fusionMethodConfig = args.FU_method_config +# return classifiersNames, classifiersConfig, fusionMethodConfig + +def getArgs(args, views, viewsIndices): + monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] + arguments = {"CL_type": "Fusion", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "FusionKWARGS": {"fusionType": "LateFusion", + "fusionMethod": "BayesianInference", + "classifiersNames": args.FU_L_cl_names, + "classifiersConfigs": [monoviewClassifierModule.getKWARGS([arg.split(":") + for arg in + classifierConfig.split(",")]) + for monoviewClassifierModule,classifierConfig + in zip(monoviewClassifierModules,args.FU_L_cl_config)], + 'fusionMethodConfig': args.FU_L_method_config, + 'monoviewSelection': args.FU_L_select_monoview, + "nbView": (len(viewsIndices))}} + return [arguments] # # def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): # if type(viewsIndices)==type(None): @@ -37,12 +67,16 @@ def genParamsSets(classificationKWARGS, nIter=1): class BayesianInference(LateFusionClassifier): def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) # self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) - self.weights = None #A modifier !! + if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig']==['']: + self.weights = [1.0 for classifier in kwargs['classifiersNames']] + else: + self.weights = np.array(map(float, kwargs['fusionMethodConfig'])) self.needProbas = True + def setParams(self, paramsSet): self.weights = paramsSet[0] diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py index 9ea51595..086c6405 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/MajorityVoting.py @@ -14,6 +14,28 @@ def genParamsSets(classificationKWARGS, nIter=1): paramsSets.append([normalizedArray]) return paramsSets + +def getArgs(args, views, viewsIndices): + monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] + arguments = {"CL_type": "Fusion", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "FusionKWARGS": {"fusionType": "LateFusion", + "fusionMethod": "BayesianInference", + "classifiersNames": args.FU_L_cl_names, + "classifiersConfigs": [monoviewClassifierModule.getKWARGS([arg.split(":") + for arg in + classifierConfig.split(";")]) + for monoviewClassifierModule,classifierConfig + in zip(args.FU_L_cl_config,monoviewClassifierModules)], + 'fusionMethodConfig': args.FU_L_method_config[0], + 'monoviewSelection': args.FU_L_select_monoview, + "nbView": (len(viewsIndices))}} + return [arguments] + # def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): # if type(viewsIndices)==type(None): # viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) @@ -37,12 +59,10 @@ def genParamsSets(classificationKWARGS, nIter=1): class MajorityVoting(LateFusionClassifier): def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig'][0]==None: + if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig'][0]==['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) - elif kwargs['fusionMethodConfig'][0]==['']: - pass else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py index 6af65564..e91a87af 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -26,6 +26,27 @@ def genParamsSets(classificationKWARGS, nIter=1): return paramsSets +def getArgs(args, views, viewsIndices): + monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] + arguments = {"CL_type": "Fusion", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "FusionKWARGS": {"fusionType": "LateFusion", + "fusionMethod": "BayesianInference", + "classifiersNames": args.FU_L_cl_names, + "classifiersConfigs": [monoviewClassifierModule.getKWARGS([arg.split(":") + for arg in + classifierConfig.split(";")]) + for monoviewClassifierModule,classifierConfig + in zip(args.FU_L_cl_config,monoviewClassifierModules)], + 'fusionMethodConfig': args.FU_L_method_config[0], + 'monoviewSelection': args.FU_L_select_monoview, + "nbView": (len(viewsIndices))}} + return [arguments] + # def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): # if type(viewsIndices)==type(None): # viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) @@ -52,14 +73,20 @@ def genParamsSets(classificationKWARGS, nIter=1): class SCMForLinear(LateFusionClassifier): def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) self.SCMClassifier = None - self.config = kwargs['fusionMethodConfig'][0] - self.p = None - self.maxAttributes = None - self.order = None - self.modelType = None + # self.config = kwargs['fusionMethodConfig'][0] + if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig'][0]==['']: + self.p = 1 + self.maxAttributes = 5 + self.order = 1 + self.modelType = "conjunction" + else: + self.p = kwargs['fusionMethodConfig'][0] + self.maxAttributes = kwargs['fusionMethodConfig'][1] + self.order = kwargs['fusionMethodConfig'][2] + self.modelType = kwargs['fusionMethodConfig'][3] def setParams(self, paramsSet): self.p = paramsSet[0] @@ -105,14 +132,14 @@ class SCMForLinear(LateFusionClassifier): def SCMForLinearFusionFit(self, DATASET, usedIndices=None, viewsIndices=None): if type(viewsIndices)==type(None): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) - if self.p is None: - self.p = float(self.config[0]) - if self.maxAttributes is None: - self.maxAttributes = int(self.config[1]) - if self.modelType is None: - self.modelType = self.config[2] - if self.order is None: - self.order = self.config[3] + # if self.p is None: + # self.p = float(self.config[0]) + # if self.maxAttributes is None: + # self.maxAttributes = int(self.config[1]) + # if self.modelType is None: + # self.modelType = self.config[2] + # if self.order is None: + # self.order = self.config[3] nbView = len(viewsIndices) self.SCMClassifier = pyscm.scm.SetCoveringMachine(p=self.p, max_attributes=self.maxAttributes, model_type=self.modelType, verbose=False) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py index 894dc7fe..1f2f1ad5 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SVMForLinear.py @@ -16,10 +16,30 @@ def genParamsSets(classificationKWARGS, nIter=1): # def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): # return None +def getArgs(args, views, viewsIndices): + monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] + arguments = {"CL_type": "Fusion", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "FusionKWARGS": {"fusionType": "LateFusion", + "fusionMethod": "BayesianInference", + "classifiersNames": args.FU_L_cl_names, + "classifiersConfigs": [monoviewClassifierModule.getKWARGS([arg.split(":") + for arg in + classifierConfig.split(";")]) + for monoviewClassifierModule,classifierConfig + in zip(args.FU_L_cl_config,monoviewClassifierModules)], + 'fusionMethodConfig': args.FU_L_method_config[0], + 'monoviewSelection': args.FU_L_select_monoview, + "nbView": (len(viewsIndices))}} + return [arguments] class SVMForLinear(LateFusionClassifier): def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) self.SVMClassifier = None diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py index caf01b88..e81ebb84 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/WeightedLinear.py @@ -15,6 +15,27 @@ def genParamsSets(classificationKWARGS, nIter=1): return paramsSets +def getArgs(args, views, viewsIndices): + monoviewClassifierModules = [getattr(MonoviewClassifiers, classifierName) for classifierName in args.FU_L_cl_names] + arguments = {"CL_type": "Fusion", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "FusionKWARGS": {"fusionType": "LateFusion", + "fusionMethod": "BayesianInference", + "classifiersNames": args.FU_L_cl_names, + "classifiersConfigs": [monoviewClassifierModule.getKWARGS([arg.split(":") + for arg in + classifierConfig.split(";")]) + for monoviewClassifierModule,classifierConfig + in zip(args.FU_L_cl_config,monoviewClassifierModules)], + 'fusionMethodConfig': args.FU_L_method_config[0], + 'monoviewSelection': args.FU_L_select_monoview, + "nbView": (len(viewsIndices))}} + return [arguments] + # def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): # if type(viewsIndices)==type(None): # viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) @@ -38,12 +59,10 @@ def genParamsSets(classificationKWARGS, nIter=1): class WeightedLinear(LateFusionClassifier): def __init__(self, NB_CORES=1, **kwargs): - LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], + LateFusionClassifier.__init__(self, kwargs['classifiersNames'], kwargs['classifiersConfigs'], kwargs["monoviewSelection"], NB_CORES=NB_CORES) - if kwargs['fusionMethodConfig'][0]==None: + if kwargs['fusionMethodConfig'][0]==None or kwargs['fusionMethodConfig'][0]==['']: self.weights = np.ones(len(kwargs["classifiersNames"]), dtype=float) - elif kwargs['fusionMethodConfig'][0]==['']: - pass else: self.weights = np.array(map(float, kwargs['fusionMethodConfig'][0])) self.needProbas = True diff --git a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py index 1d12dc86..31f89fbb 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Mumbo/Mumbo.py @@ -1,12 +1,14 @@ import numpy as np import math from joblib import Parallel, delayed +import itertools from Classifiers import * import time import logging from sklearn.metrics import accuracy_score from utils.Dataset import getV + # Author-Info __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype @@ -14,6 +16,24 @@ __status__ = "Prototype" # Production, Development, P # Data shape : ((Views, Examples, Corrdinates)) +def getArgs(args, benchmark, views, viewsIndices): + argumentsList = [] + + arguments = {"CL_type": "Mumbo", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes), + "LABELS_NAMES": args.CL_classes, + "MumboKWARGS": {"classifiersNames": args.MU_types, + "maxIter": int(args.MU_iter[0]), "minIter": int(args.MU_iter[1]), + "threshold": args.MU_iter[2], + "classifiersConfigs": [map(float, argument.split(":")) for argument in + args.MU_config], "nbView": (len(viewsIndices))}} + argumentsList.append(arguments) + return argumentsList + + def computeWeights(DATASET_LENGTH, iterIndex, viewIndice, CLASS_LABELS, costMatrices): dist = np.sum(costMatrices[iterIndex, viewIndice]) dist = dist - np.sum(np.array( @@ -52,7 +72,7 @@ def gridSearch_hdf5(DATASET, viewIndices, classificationKWARGS, learningIndices, for classifierIndex, classifierName in enumerate(classifiersNames): logging.debug("\tStart:\t Random search for "+classifierName+" on "+DATASET.get("View"+str(viewIndices[classifierIndex])).attrs["name"]) classifierModule = globals()[classifierName] # Permet d'appeler une fonction avec une string - classifierGridSearch = getattr(classifierModule, "gridSearch") + classifierGridSearch = getattr(classifierModule, "hyperParamSearch") bestSettings.append(classifierGridSearch(getV(DATASET, viewIndices[classifierIndex], learningIndices), DATASET.get("Labels")[learningIndices], metric=metric)) logging.debug("\tDone:\t Gridsearch for "+classifierName) diff --git a/Code/MonoMutliViewClassifiers/ResultAnalysis.py b/Code/MonoMutliViewClassifiers/ResultAnalysis.py index e092fdd8..af37d3ec 100644 --- a/Code/MonoMutliViewClassifiers/ResultAnalysis.py +++ b/Code/MonoMutliViewClassifiers/ResultAnalysis.py @@ -95,18 +95,18 @@ def analyzeLabels(labelsArrays, realLabels, results, directory): classifiersNames = genNamesFromRes(mono, multi) nbClassifiers = len(classifiersNames) nbExamples = realLabels.shape[0] - nbIter = 20 + nbIter = 2 data = np.zeros((nbExamples, nbClassifiers*nbIter)) tempData = np.array([labelsArray == realLabels for labelsArray in np.transpose(labelsArrays)]).astype(int) for classifierIndex in range(nbClassifiers): for iterIndex in range(nbIter): data[:,classifierIndex*nbIter+iterIndex] = tempData[classifierIndex,:] - fig = pylab.figure(figsize=(30,20)) + fig = pylab.figure(figsize=(10,20)) cmap = mpl.colors.ListedColormap(['red','green']) bounds=[-0.5,0.5,1.5] norm = mpl.colors.BoundaryNorm(bounds, cmap.N) - cax = plt.imshow(data, interpolation='nearest', cmap=cmap, norm=norm) + cax = plt.imshow(data, interpolation='none', cmap=cmap, norm=norm, aspect='auto') plt.title('Error on examples depending on the classifier') ticks = np.arange(0, nbClassifiers*nbIter, nbIter) labels = classifiersNames diff --git a/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py b/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py index 8dddd4c9..dec08581 100644 --- a/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py +++ b/Code/MonoMutliViewClassifiers/utils/HyperParameterSearch.py @@ -4,7 +4,7 @@ import sys import Multiview import Metrics -def searchBestSettings(dataset, classifierName, metrics, iLearningIndices, iKFolds, viewsIndices=None, searchingTool="gridSearch", nIter=1, **kwargs): +def searchBestSettings(dataset, classifierName, metrics, iLearningIndices, iKFolds, viewsIndices=None, searchingTool="hyperParamSearch", nIter=1, **kwargs): if viewsIndices is None: viewsIndices = range(dataset.get("Metadata").attrs["nbView"]) thismodule = sys.modules[__name__] -- GitLab