diff --git a/Code/MonoMutliViewClassifiers/ExecClassif.py b/Code/MonoMutliViewClassifiers/ExecClassif.py index 97f65faebd42d3471ec93fe7cc7ccb32044c23c4..081e6de06367a3cdd054758bc86b57e91960d738 100644 --- a/Code/MonoMutliViewClassifiers/ExecClassif.py +++ b/Code/MonoMutliViewClassifiers/ExecClassif.py @@ -149,7 +149,7 @@ groupFusion = parser.add_argument_group('Fusion arguments') groupFusion.add_argument('--FU_types', metavar='STRING', action='store', help='Determine which type of fusion to use, if multiple separate with :', default='LateFusion:EarlyFusion') -groupFusion.add_argument('--FU_ealy_methods', metavar='STRING', action='store', +groupFusion.add_argument('--FU_early_methods', metavar='STRING', action='store', help='Determine which early fusion method of fusion to use, if multiple separate with :', default='') groupFusion.add_argument('--FU_late_methods', metavar='STRING', action='store', @@ -276,7 +276,7 @@ if "Multiview" in args.CL_type.strip(":"): else: benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"] = args.FU_late_methods.split(":") if "EarlyFusion" in args.FU_types.split(":"): - if args.FU_late_methods.split(":") == [""]: + if args.FU_early_methods.split(":") == [""]: benchmark["Multiview"]["Fusion"]["Methods"]["EarlyFusion"] = [name for _, name, isPackage in pkgutil.iter_modules(["Multiview/Fusion/Methods/EarlyFusionPackage"]) if not isPackage] @@ -379,7 +379,7 @@ if True: except: pass - if True: + try: if benchmark["Multiview"]["Fusion"]: if args.CL_algos_monoview !=['']: monoClassifiers = args.CL_algos_monoview.split(":") @@ -389,24 +389,13 @@ if True: elif not gridSearch: raise ValueError("No config for fusion method given and no gridearch wanted") else: - fusionMethodConfigs = [["config"] for method in benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]] - for methodIndex, method in enumerate(benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]): - if args.FU_fixed: - arguments = {"CL_type": "Fusion", - "views": views, - "NB_VIEW": len(views), - "viewsIndices": viewsIndices, - "NB_CLASS": len(args.CL_classes.split(":")), - "LABELS_NAMES": args.CL_classes.split(":"), - "FusionKWARGS": {"fusionType":"LateFusion", "fusionMethod":method, - "classifiersNames": args.FU_cl_names.split(":"), - "classifiersConfigs": monoClassifiersConfigs, - 'fusionMethodConfig': fusionMethodConfigs[methodIndex]}} - argumentDictionaries["Multiview"].append(arguments) - else: - for combination in itertools.combinations_with_replacement(range(len(monoClassifiers)), NB_VIEW): - monoClassifiersNamesComb = [monoClassifiers[index] for index in combination] - monoClassifiersConfigsComb = [monoClassifiersConfigs[index] for index in combination] + try: + fusionMethodConfigs = [["config"] for method in benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]] + except: + pass + try: + for methodIndex, method in enumerate(benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"]): + if args.FU_fixed: arguments = {"CL_type": "Fusion", "views": views, "NB_VIEW": len(views), @@ -414,10 +403,27 @@ if True: "NB_CLASS": len(args.CL_classes.split(":")), "LABELS_NAMES": args.CL_classes.split(":"), "FusionKWARGS": {"fusionType":"LateFusion", "fusionMethod":method, - "classifiersNames": monoClassifiersNamesComb, - "classifiersConfigs": monoClassifiersConfigsComb, + "classifiersNames": args.FU_cl_names.split(":"), + "classifiersConfigs": monoClassifiersConfigs, 'fusionMethodConfig': fusionMethodConfigs[methodIndex]}} argumentDictionaries["Multiview"].append(arguments) + else: + for combination in itertools.combinations_with_replacement(range(len(monoClassifiers)), NB_VIEW): + monoClassifiersNamesComb = [monoClassifiers[index] for index in combination] + monoClassifiersConfigsComb = [monoClassifiersConfigs[index] for index in combination] + arguments = {"CL_type": "Fusion", + "views": views, + "NB_VIEW": len(views), + "viewsIndices": viewsIndices, + "NB_CLASS": len(args.CL_classes.split(":")), + "LABELS_NAMES": args.CL_classes.split(":"), + "FusionKWARGS": {"fusionType":"LateFusion", "fusionMethod":method, + "classifiersNames": monoClassifiersNamesComb, + "classifiersConfigs": monoClassifiersConfigsComb, + 'fusionMethodConfig': fusionMethodConfigs[methodIndex]}} + argumentDictionaries["Multiview"].append(arguments) + except: + pass else: try: if benchmark["Multiview"]["Fusion"]["Methods"]["LateFusion"] and benchmark["Multiview"]["Fusion"]["Classifiers"]: @@ -458,7 +464,7 @@ if True: argumentDictionaries["Multiview"].append(arguments) except: pass - else: + except: pass else: pass diff --git a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py index 8fb459f72ac6ab46b08374a92328488fbcecdf1f..67f0ce4ba5a9404535e46b8f1b934483c8df62c9 100644 --- a/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py +++ b/Code/MonoMutliViewClassifiers/MonoviewClassifiers/SCM.py @@ -100,9 +100,9 @@ def gridSearch(X_train, y_train, nbFolds=4, metric=["accuracy_score", None], nIt def getConfig(config): try : - return "\n\t\t- SCM with max_attributes : "+str(config[0])+", c : "+str(config[1])+", p : "+str(config[2]) + return "\n\t\t- SCM with max_attributes : "+str(config[0])#+", c : "+str(config[1])+", p : "+str(config[2]) except: - return "\n\t\t- SCM with max_attributes : "+str(config["0"])+", c : "+str(config["1"])+", p : "+str(config["2"]) + return "\n\t\t- SCM with max_attributes : "+str(config["0"])#+", c : "+str(config["1"])+", p : "+str(config["2"]) def transformData(dataArray): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py index 650d4e836a3ae892fbba816a876c1a1ed73f6054..a87bab4203ed617da510d39d26024ab4832ca08c 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusion.py @@ -11,8 +11,8 @@ class EarlyFusionClassifier(object): if type(monoviewClassifierConfig[0])==dict: pass else: - monoviewClassifierConfig[0] = dict((str(configIndex), config) for configIndex, config in - enumerate(self.monoviewClassifiersConfig + monoviewClassifierConfig[0] = dict((str(configIndex), config[0]) for configIndex, config in + enumerate(monoviewClassifierConfig )) self.monoviewClassifiersConfig = monoviewClassifierConfig[0] self.monoviewClassifier = None @@ -24,11 +24,11 @@ class EarlyFusionClassifier(object): viewsIndices = np.arange(DATASET.get("Metadata").attrs["nbView"]) nbView = len(viewsIndices) if not usedIndices: - uesdIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) + usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"]) if type(weights)== type(None): weights = np.array([1/nbView for i in range(nbView)]) if sum(weights)!=1: weights = weights/sum(weights) - self.monoviewData = np.concatenate([weights[index]*getV(DATASET, viewIndex, usedIndices) + self.monoviewData = np.concatenate([getV(DATASET, viewIndex, usedIndices) for index, viewIndex in enumerate(viewsIndices)], axis=1) diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py index 9927041126f16ed9b09e16edfdb459b9a21e2387..31bdcc3596bbc70a554db1ec90440ae24637468f 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/EarlyFusionPackage/WeightedLinear.py @@ -22,7 +22,7 @@ def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndic if accuracy > bestScore: bestScore = accuracy bestConfig = normalizedArray - return [bestConfig] + return [np.array([1.0 for i in range(nbView)])] class WeightedLinear(EarlyFusionClassifier): diff --git a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py index dd7b17e7201adb3fcc6afcdb4991c860575f2c59..57888231b715addfed55322a7c20e3811cc8c810 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py +++ b/Code/MonoMutliViewClassifiers/Multiview/Fusion/Methods/LateFusionPackage/SCMForLinear.py @@ -11,6 +11,7 @@ from pyscm.utils import _unpack_binary_bytes_from_ints from math import ceil import random from sklearn.metrics import accuracy_score +import itertools def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndices=None): @@ -23,16 +24,17 @@ def gridSearch(DATASET, classificationKWARGS, trainIndices, nIter=30, viewsIndic max_attributes = random.randint(1, 20) p = random.random() model = random.choice(["conjunction", "disjunction"]) + order = random.randint(1,nbView) randomWeightsArray = np.random.random_sample(nbView) normalizedArray = randomWeightsArray/np.sum(randomWeightsArray) - classificationKWARGS["fusionMethodConfig"][0] = [p, max_attributes, model] + classificationKWARGS["fusionMethodConfig"][0] = [p, max_attributes, model, order] classifier = SCMForLinear(1, **classificationKWARGS) classifier.fit_hdf5(DATASET, trainIndices, viewsIndices=viewsIndices) predictedLabels = classifier.predict_hdf5(DATASET, trainIndices, viewsIndices=viewsIndices) accuracy = accuracy_score(DATASET.get("Labels")[trainIndices], predictedLabels) if accuracy > bestScore: bestScore = accuracy - bestConfig = [p, max_attributes, model] + bestConfig = [p, max_attributes, model, order] return [bestConfig] @@ -69,7 +71,8 @@ class SCMForLinear(LateFusionClassifier): for index, viewIndex in enumerate(viewsIndices): monoviewDecisions[:, index] = self.monoviewClassifiers[index].predict( getV(DATASET, viewIndex, usedIndices)) - predictedLabels = self.SCMClassifier.predict(monoviewDecisions) + features = self.generateInteractions(monoviewDecisions, order=self.order) + predictedLabels = self.SCMClassifier.predict(features) else: predictedLabels = [] return predictedLabels @@ -80,16 +83,19 @@ class SCMForLinear(LateFusionClassifier): p = float(self.config[0]) maxAttributes = int(self.config[1]) modelType = self.config[2] + self.order = self.config[3] nbView = len(viewsIndices) + self.SCMClassifier = pyscm.scm.SetCoveringMachine(p=p, max_attributes=maxAttributes, model_type=modelType, verbose=False) monoViewDecisions = np.zeros((len(usedIndices), nbView), dtype=int) for index, viewIndex in enumerate(viewsIndices): monoViewDecisions[:, index] = self.monoviewClassifiers[index].predict( getV(DATASET, viewIndex, usedIndices)) - featureSequence = [str(featureIndex) for featureIndex in range(monoViewDecisions.shape[1])] - featureIndexByRule = np.arange(monoViewDecisions.shape[1], dtype=np.uint32) + features = self.generateInteractions(monoViewDecisions, order=self.order) + featureSequence = [str(featureIndex) for featureIndex in range(features.shape[1])] + featureIndexByRule = np.arange(features.shape[1], dtype=np.uint32) binaryAttributes = LazyBaptisteRuleList(featureSequence, featureIndexByRule) - packedData = _pack_binary_bytes_to_ints(monoViewDecisions, 64) + packedData = _pack_binary_bytes_to_ints(features, 64) nameb = "temp_scm_fusion" if not os.path.isfile(nameb): dsetFile = h5py.File(nameb, "w") @@ -110,7 +116,7 @@ class SCMForLinear(LateFusionClassifier): dsetFile.close() dsetFile = h5py.File(name, "r") packedDataset = dsetFile.get("temp_scm") - attributeClassification = BaptisteRuleClassifications(packedDataset, monoViewDecisions.shape[0]) + attributeClassification = BaptisteRuleClassifications(packedDataset, features.shape[0]) self.SCMClassifier.fit(binaryAttributes, DATASET.get("Labels")[usedIndices], attribute_classifications=attributeClassification) try: dsetFile.close() @@ -118,6 +124,28 @@ class SCMForLinear(LateFusionClassifier): except: pass + def generateInteractions(self, monoViewDecisions, order=None): + if type(order)==type(None): + order = monoViewDecisions.shape[1] + genratedIntercations = [monoViewDecisions[:,i] for i in range(monoViewDecisions.shape[1])] + if order==1: + return monoViewDecisions + else: + for orderIndex in range(order-1): + combins = itertools.combinations(range(monoViewDecisions.shape[1]), orderIndex+2) + for combin in combins: + generatedDecision = monoViewDecisions[:,combin[0]] + for index in range(len(combin)-1): + if self.config[2]=="disjunction": + generatedDecision = np.logical_and(generatedDecision, monoViewDecisions[:,combin[index+1]]) + else: + generatedDecision = np.logical_or(generatedDecision, monoViewDecisions[:,combin[index+1]]) + genratedIntercations.append(generatedDecision) + return np.transpose(np.array(genratedIntercations).astype(np.uint8)) + + + + def getConfig(self, fusionMethodConfig, monoviewClassifiersNames,monoviewClassifiersConfigs): configString = "with SCM for linear with max_attributes : "+str(self.config[1])+", p : "+str(self.config[0])+\ " model_type : "+str(self.config[2])+" has chosen "+\ diff --git a/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py b/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py index 6c4a31418c029f92a1054c362765365b11d5a9a7..9d1ec462239af1467ac4907a10fff5b2e29c764a 100644 --- a/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py +++ b/Code/MonoMutliViewClassifiers/Multiview/GetMultiviewDb.py @@ -13,23 +13,27 @@ __author__ = "Baptiste Bauvin" __status__ = "Prototype" # Production, Development, Prototype -def getPlausibleDBhdf5(features, pathF, name , NB_CLASS, LABELS_NAME, nbView=4, nbClass=2, datasetLength=100): - nbFeatures = 300 +def getPlausibleDBhdf5(features, pathF, name , NB_CLASS, LABELS_NAME, nbView=4, nbClass=2, datasetLength=500): + nbFeatures = 150 datasetFile = h5py.File(pathF+"Plausible.hdf5", "w") + CLASS_LABELS = np.array([0 for i in range(datasetLength/2)]+[1 for i in range(datasetLength/2)]) for viewIndex in range(nbView): - if viewIndex== 0 : - viewData = np.array([np.zeros(nbFeatures) for i in range(datasetLength/2)]+[np.ones(nbFeatures) for i in range(datasetLength/2)]).astype(np.uint8) - viewDset = datasetFile.create_dataset("View"+str(viewIndex), viewData.shape) - viewDset.attrs["name"] = "View"+str(viewIndex) - viewDset.attrs["sparse"] = False - viewDset.attrs["binary"] = True - else: - viewData = np.array([np.random.normal(float((viewIndex+1)*10), 0.42, nbFeatures) for i in range(datasetLength/2)]+[np.random.normal(-float((viewIndex+1)*10),0.42,nbFeatures) for j in range(datasetLength/2)]) - viewDset = datasetFile.create_dataset("View"+str(viewIndex), viewData.shape) - viewDset.attrs["name"] = "View"+str(viewIndex) - viewDset.attrs["sparse"] = False - viewDset.attrs["binary"] = False - CLASS_LABELS = np.array([0 for i in range(datasetLength/2+5)]+[1 for i in range(datasetLength/2-5)]) + # if viewIndex== 0 : + viewData = np.array([np.zeros(nbFeatures) for i in range(datasetLength/2)]+[np.ones(nbFeatures) for i in range(datasetLength/2)]).astype(np.uint8) + fakeTrueIndices = np.random.randint(0, datasetLength/2-1, datasetLength/5) + fakeFalseIndices = np.random.randint(datasetLength/2, datasetLength, datasetLength/5) + viewData[fakeTrueIndices] = np.ones((len(fakeTrueIndices), nbFeatures)) + viewData[fakeFalseIndices] = np.zeros((len(fakeFalseIndices), nbFeatures)) + viewDset = datasetFile.create_dataset("View"+str(viewIndex), viewData.shape, data=viewData) + viewDset.attrs["name"] = "View"+str(viewIndex) + viewDset.attrs["sparse"] = False + viewDset.attrs["binary"] = True + # else: + # viewData = np.array([np.random.normal(float((viewIndex+1)*10), 0.42, nbFeatures) for i in range(datasetLength/2)]+[np.random.normal(-float((viewIndex+1)*10),0.42,nbFeatures) for j in range(datasetLength/2)]) + # viewDset = datasetFile.create_dataset("View"+str(viewIndex), viewData.shape) + # viewDset.attrs["name"] = "View"+str(viewIndex) + # viewDset.attrs["sparse"] = False + # viewDset.attrs["binary"] = False labelsDset = datasetFile.create_dataset("Labels", CLASS_LABELS.shape) labelsDset[...] = CLASS_LABELS labelsDset.attrs["name"] = "Labels"