diff --git a/README.md b/README.md index 47dad1a51fbead53bb7f0b64a8c2ab1fbc156e6d..7782eb97a1026686cc3c92e3f14fcf402bfecac7 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,10 @@ With `top_directory` being the last directory in the `pathF` argument ##### If you already have an HDF5 dataset file it must be formatted as : One dataset for each view called `ViewX` with `X` being the view index with 2 attribures : * `attrs["name"]` a string for the name of the view -* `attrs["name"]` a boolean specifying whether the view is sparse or not +* `attrs["sparse"]` a boolean specifying whether the view is sparse or not +* `attrs["ranges"]` a `np.array` containing the ranges of each attribute in the view (for ex. : for a pixel the range will be 255, for a real attribute in [-1,1], the range will be 2). +* `attrs["limits"]` a `np.array` containing all the limits of the attributes int he view. (for ex. : for a pixel the limits will be `[0, 255]`, for a real attribute in [-1,1], the limits will be `[-1,1]`). + One dataset for the labels called `Labels` with one attribute : * `attrs["names"]` a list of strings encoded in utf-8 namig the labels in the right order diff --git a/docs/source/conf.py b/docs/source/conf.py index 2a8198dce645d364064e6186c41c19e02af88e3c..a62e70c96876ab175addf6f34665fcb6fb6017d3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from recommonmark.parser import CommonMarkParser from recommonmark.transform import AutoStructify +# import os, sys # # MultiviewPlatform documentation build configuration file, created by # sphinx-quickstart on Mon Jan 29 17:13:09 2018. @@ -31,6 +32,8 @@ from recommonmark.transform import AutoStructify add_module_names = False +# sys.path.append(os.path.abspath('sphinxext')) + # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. @@ -44,7 +47,9 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.githubpages', - 'sphinx.ext.napoleon'] + 'sphinx.ext.napoleon', + 'recommonmark'] + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -52,19 +57,20 @@ templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # -source_suffix = ['.rst', '.md'] +source_suffix = {'.rst': 'restructuredtext', '.md':'markdown'} # source_suffix = '.rst' +# source_suffix = ['.rst', '.md'] -source_parsers = { - '.md': CommonMarkParser, -} +# source_parsers = { +# '.md': CommonMarkParser, +# } # The master toctree document. master_doc = 'index' # General information about the project. project = u'MultiviewPlatform' -copyright = u'2018, Baptiste BAUVIN' +copyright = u'2019, Baptiste BAUVIN' author = u'Baptiste BAUVIN' # The version info for the project you're documenting, acts as replacement for @@ -176,9 +182,8 @@ texinfo_documents = [ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'https://docs.python.org/': None} -def setup(app): - app.add_config_value('recommonmark_config', { - 'url_resolver': lambda url: github_doc_root + url, - 'auto_toc_tree_section': 'Contents', - }, True) - app.add_transform(AutoStructify) \ No newline at end of file +# def setup(app): +# app.add_config_value('recommonmark_config', { +# 'auto_toc_tree_section': 'Contents', +# }, True) +# app.add_transform(AutoStructify) \ No newline at end of file diff --git a/docs/source/readme.rst b/docs/source/readme.rst index 8ba7870d48f32245e8d2376bb92d3671a5ff039a..33481978594be226a3dbb05193c7c4bbe54c8d75 100644 --- a/docs/source/readme.rst +++ b/docs/source/readme.rst @@ -1,3 +1,7 @@ Read me ======= - .. include:: ../../README.md \ No newline at end of file + +.. toctree:: + :maxdepth: 1 + + ../../README.md \ No newline at end of file diff --git a/docs/source/sphinxext/recommon.py b/docs/source/sphinxext/recommon.py new file mode 100644 index 0000000000000000000000000000000000000000..6b1cb8c84239dd57a9e36625b57f110de44ae37f --- /dev/null +++ b/docs/source/sphinxext/recommon.py @@ -0,0 +1,4 @@ +from recommonmark.transform import AutoStructify + +def setup(app): + app.add_transform(AutoStructify) \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py index bf6e0b12f176cd177ade68d4b8925ebdecd43f82..f445fbe33f60e5bff9a2a480487f1c2ae042b329 100644 --- a/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py +++ b/multiview_platform/MonoMultiViewClassifiers/ExecClassif.py @@ -460,7 +460,7 @@ def execClassif(arguments): getDatabase = execution.getDatabaseFunction(args.name,args.type) DATASET, LABELS_DICTIONARY = getDatabase(args.views, args.pathF, args.name, args.CL_nbClass, - args.CL_classes, randomState, args.full) + args.CL_classes, randomState, args.full, args.add_noise, args.noise_std) splits = execution.genSplits(DATASET.get("Labels").value, args.CL_split, statsIterRandomStates) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py index 503bae21b04262b51e873e6d3a09b0d3029c9dcb..43c92866e0ca5a4f548c5ba4c6359710a5c642a4 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/BoostUtils.py @@ -669,12 +669,13 @@ class ConvexProgram(object): return signs -def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None): +def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accuracies", bounds=None, bound_name=None, boosting_bound=None, set="train"): if type(name) is not str: name = " ".join(name.getConfig().strip().split(" ")[:2]) if bounds: f, ax = plt.subplots(nrows=1, ncols=1) - ax.set_title(name+" during train for "+classifier_name) + ax.set_ylim(bottom=0.0,top=1.0) + ax.set_title(name+" during "+set+" for "+classifier_name) x = np.arange(len(train_accuracies)) scat = ax.scatter(x, np.array(train_accuracies), marker=".") if boosting_bound: @@ -690,7 +691,8 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur plt.close() else: f, ax = plt.subplots(nrows=1, ncols=1) - ax.set_title(name+" during train for "+classifier_name) + ax.set_ylim(bottom=0.0, top=1.0) + ax.set_title(name + " during "+set+" for " + classifier_name) x = np.arange(len(train_accuracies)) scat = ax.scatter(x, np.array(train_accuracies), marker=".", ) ax.legend((scat,), (name,)) @@ -702,7 +704,7 @@ def get_accuracy_graph(train_accuracies, classifier_name, file_name, name="Accur class BaseBoost(object): def __init__(self): - self.n_stumps = 1 + self.n_stumps = 10 def _collect_probas(self, X): return np.asarray([clf.predict_proba(X) for clf in self.estimators_generator.estimators_]) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index 2ca5956319bd19333d242c23aa702307cf6feec3..e0ea8568b7b911c6e9ab8e79f8705fe4173c2173 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -54,6 +54,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.train_time = 0 self.train_shape = None self.step_decisions = None + self.step_prod = None self.n_max_iterations = n_max_iterations self.estimators_generator = estimators_generator self.self_complemented = self_complemented @@ -73,13 +74,13 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): "n_stumps", "use_r", "c_bound_sol"] def set_params(self, **params): - self.self_complemented = params["self_complemented"] - self.twice_the_same = params["twice_the_same"] - self.c_bound_choice = params["c_bound_choice"] - self.random_start = params["random_start"] + # self.self_complemented = params["self_complemented"] + # self.twice_the_same = params["twice_the_same"] + # self.c_bound_choice = params["c_bound_choice"] + # self.random_start = params["random_start"] self.n_max_iterations = params["n_max_iterations"] - self.n_stumps = params["n_stumps_per_attribute"] - self.use_r = params["use_r"] + # self.n_stumps = params["n_stumps_per_attribute"] + # self.use_r = params["use_r"] def fit(self, X, y): @@ -96,20 +97,23 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.n_total_examples = m self.init_boosting(m, formatted_y, y_kernel_matrix) + self.break_cause = " the maximum number of iterations was attained." for k in range(min(n - 1, self.n_max_iterations - 1 if self.n_max_iterations is not None else np.inf)): + # Print dynamically the step and the error of the current classifier self.it = k - print( - "Resp. bound : {}, {}; {}/{}, eps :{}".format(self.respected_bound, - self.bounds[-1] > self.train_metrics[-1], - k + 2, - self.n_max_iterations, - self.voter_perfs[-1]), - end="\r") + + # print( + # "Resp. bound : {}, {}; {}/{}, eps :{}".format(self.respected_bound, + # self.bounds[-1] > self.train_metrics[-1], + # k + 2, + # self.n_max_iterations, + # self.voter_perfs[-1]), + # end="\r") sol, new_voter_index = self.choose_new_voter(y_kernel_matrix, formatted_y) @@ -125,8 +129,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.update_example_weights(formatted_y) + self.update_info_containers(formatted_y, voter_perf, k) + self.nb_opposed_voters = self.check_opposed_voters() self.estimators_generator.estimators_ = \ self.estimators_generator.estimators_[self.chosen_columns_] @@ -155,7 +161,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): classification_matrix = self._binary_classification_matrix(X) self.step_predict(classification_matrix) margins = np.sum(classification_matrix * self.weights_, axis=1) - # print(margins) signs_array = np.array([int(x) for x in sign(margins)]) signs_array[signs_array == -1] = 0 end = time.time() @@ -165,24 +170,18 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def step_predict(self, classification_matrix): if classification_matrix.shape != self.train_shape: self.step_decisions = np.zeros(classification_matrix.shape) + self.step_prod = np.zeros(classification_matrix.shape) for weight_index in range(self.weights_.shape[0]-1): margins = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1) - # print(margins) signs_array = np.array([int(x) for x in sign(margins)]) signs_array[signs_array == -1] = 0 self.step_decisions[:, weight_index] = signs_array + self.step_prod[:, weight_index] = np.sum(classification_matrix[:, :weight_index+1]* self.weights_[:weight_index+1], axis=1) def update_info_containers(self, y, voter_perf, k): """Is used at each iteration to compute and store all the needed quantities for later analysis""" self.example_weights_.append(self.example_weights) - m = self.new_voter.shape[0] - t = np.sum(self.previous_vote * self.new_voter)/m - print(np.linalg.norm(self.previous_vote)>1) - # if abs((g_g*f2*(2*g_f+self.q*g_g))/(g_f**2*(2*d_fg+self.q * m)))<=1: - # print((g_g*f2*(2*g_f+self.q*g_g))/(g_f**2*(2*d_fg+self.q * m))) - # print((g_g*f2*(2*g_f+self.q*g_g))/(g_f**2*(2*d_fg+self.q * m))>=1) self.previous_vote += self.q * self.new_voter - self.previous_votes.append(self.previous_vote) self.previous_margins.append( np.multiply(y, self.previous_vote)) @@ -226,6 +225,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): def append_new_voter(self, new_voter_index): """Used to append the voter to the majority vote""" self.chosen_columns_.append(new_voter_index) + # print((self.classification_matrix[:, new_voter_index] == self.chosen_one).all()) self.new_voter = self.classification_matrix[:, new_voter_index].reshape( (self.n_total_examples, 1)) @@ -244,6 +244,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.example_weights = self._initialize_alphas(m).reshape((m, 1)) self.example_weights_.append(self.example_weights) + if self.random_start: first_voter_index = self.random_state.choice( np.where(np.sum(y_kernel_matrix, axis=0)>0)[0]) @@ -252,8 +253,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): y_kernel_matrix) self.chosen_columns_.append(first_voter_index) - self.new_voter = self.classification_matrix[:, - first_voter_index].reshape((m, 1)) + self.new_voter = np.array(self.classification_matrix[:, + first_voter_index].reshape((m, 1)), copy=True) self.previous_vote = self.new_voter @@ -276,6 +277,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.update_example_weights(y) self.example_weights_.append(self.example_weights) + self.previous_margins.append( np.multiply(y, self.previous_vote)) self.selected_margins.append(np.sum(np.multiply(y, self.previous_vote))) @@ -293,6 +295,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.bounds.append(bound) + + def format_X_y(self, X, y): """Formats the data : X -the examples- and y -the labels- to be used properly by the algorithm """ if scipy.sparse.issparse(X): @@ -315,6 +319,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): m, n = self.classification_matrix.shape y_kernel_matrix = np.multiply(y, self.classification_matrix) + return m, n, y_kernel_matrix def init_info_containers(self): @@ -374,7 +379,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): and select the one that has the smallest minimum""" m = y_kernel_matrix.shape[0] weighted_previous_sum = np.multiply(y, - self.previous_vote.reshape((m, 1))) + self.previous_vote.reshape(m,1)) margin_old = np.sum(weighted_previous_sum) if self.c_bound_sol: weighted_hypothesis = y_kernel_matrix @@ -384,7 +389,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): bad_margins = np.where(np.sum(weighted_hypothesis, axis=0)<=0.0)[0] self.B2 = m - self.B1s = np.sum(2 * (weighted_previous_sum * weighted_hypothesis), + self.B1s = np.sum(2 * np.multiply(weighted_previous_sum, weighted_hypothesis), axis=0) self.B0 = np.sum(weighted_previous_sum ** 2) @@ -406,6 +411,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): self.margins.append(math.sqrt(self.A2s[best_hyp_index]/m)) self.disagreements.append(0.5*self.B1s[best_hyp_index]/m) + return sols[best_hyp_index], best_hyp_index def make_masked_c_bounds(self, sols, bad_margins): @@ -453,8 +459,20 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): for step_index in range(self.step_decisions.shape[1]-1): step_metrics.append(self.plotted_metric.score(y_test, self.step_decisions[:, step_index])) step_metrics = np.array(step_metrics) + np.savetxt(directory + "step_test_metrics.csv", step_metrics, delimiter=',') get_accuracy_graph(step_metrics, self.__class__.__name__, - directory + 'step_test_metrics.png', self.plotted_metric) + directory + 'step_test_metrics.png', self.plotted_metric, set="test") + step_cbounds = [] + for step_index in range(self.step_prod.shape[1]): + num = np.sum(y_test*self.step_prod[:, step_index])**2 + den = np.sum((self.step_prod[:, step_index])**2) + step_cbounds.append(1-num/(den*self.step_prod.shape[0])) + step_cbounds = np.array(step_cbounds) + np.savetxt(directory + "step_test_c_bounds.csv", step_cbounds, + delimiter=',') + get_accuracy_graph(step_cbounds, self.__class__.__name__, + directory + 'step_test_c_bounds.png', + "C_bound", set="test") def getInterpretQar(self, directory, y_test=None): self.directory = directory diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py index d9ecbf28d18d6d3aa99a1705eaf53d2cb3bcf3c3..b2d7dcad8e35363b85413683d4c92e32a9ec8dcc 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py @@ -1,23 +1,24 @@ -from ..Monoview.MonoviewUtils import BaseMonoviewClassifier +from ..Monoview.MonoviewUtils import BaseMonoviewClassifier, CustomRandint from ..Monoview.Additions.BoostUtils import getInterpretBase from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - def __init__(self, random_state=None, **kwargs): - super(CGreed, self).__init__(n_max_iterations=500, + def __init__(self, random_state=None, n_max_iterations=500, n_stumps_per_attribute=10, **kwargs): + super(CGreed, self).__init__(n_max_iterations=n_max_iterations, random_state=random_state, self_complemented=True, twice_the_same=True, c_bound_choice=True, random_start=False, - n_stumps_per_attribute=10, + n_stumps_per_attribute=n_stumps_per_attribute, use_r=True, c_bound_sol=True ) - self.param_names = [] - self.distribs = [] + + self.param_names = ["n_max_iterations"] + self.distribs = [CustomRandint(low=1, high=500)] self.classed_params = [] self.weird_strings = {} @@ -34,7 +35,8 @@ class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier): def formatCmdArgs(args): """Used to format kwargs for the parsed args""" - kwargsDict = {} + kwargsDict = {"n_stumps_per_attribute":args.CGR_stumps, + "n_max_iterations":args.CGR_n_iter} return kwargsDict diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py index 9a9f486fb5eec6598d7f5b9d1b851d0cefbaecb6..0086ae5cf4babc132ca10a0fcae294781bef1897 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py @@ -23,6 +23,8 @@ def copyHDF5(pathF, name, nbCores): newDataSet.close() + + def datasetsAlreadyExist(pathF, name, nbCores): """Used to check if it's necessary to copy datasets""" allDatasetExist = True @@ -52,7 +54,7 @@ def makeMeNoisy(viewData, randomState, percentage=15): return noisyViewData -def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", randomState=None, full=True, nbView=3, +def getPlausibleDBhdf5(features, pathF, name, NB_CLASS=3, LABELS_NAME="", randomState=None, full=True, add_noise=False, noise_std=0.15, nbView=3, nbClass=2, datasetLength=34, randomStateInt=None): """Used to generate a plausible dataset to test the algorithms""" randomStateInt = 42 @@ -289,38 +291,74 @@ def copyhdf5Dataset(sourceDataFile, destinationDataFile, sourceDatasetName, dest newDset.attrs[key] = value -def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False): +def getClassicDBhdf5(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False, add_noise=False, noise_std=0.15): """Used to load a hdf5 database""" if full: datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") + dataset_name = nameDB labelsDictionary = dict((labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in enumerate(datasetFile.get("Labels").attrs["names"])) - return datasetFile, labelsDictionary else: askedLabelsNames = [askedLabelName.encode("utf8") for askedLabelName in askedLabelsNames] - datasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") - fullLabels = datasetFile.get("Labels").value - temp_dataset = h5py.File(pathF+nameDB+"_temp_view_label_select.hdf5", "w") - datasetFile.copy("Metadata", temp_dataset) + baseDatasetFile = h5py.File(pathF + nameDB + ".hdf5", "r") + fullLabels = baseDatasetFile.get("Labels").value + datasetFile = h5py.File(pathF+nameDB+"_temp_view_label_select.hdf5", "w") + dataset_name = nameDB+"_temp_view_label_select" + baseDatasetFile.copy("Metadata", datasetFile) labelsSet = getClasses(fullLabels) - availableLabelsNames = list(datasetFile.get("Labels").attrs["names"]) + availableLabelsNames = list(baseDatasetFile.get("Labels").attrs["names"]) askedLabelsNames, askedLabelsNamesSet = fillLabelNames(NB_CLASS, askedLabelsNames, randomState, availableLabelsNames) newLabels, newLabelsNames, usedIndices = filterLabels(labelsSet, askedLabelsNamesSet, fullLabels, availableLabelsNames, askedLabelsNames) - temp_dataset.get("Metadata").attrs["datasetLength"] = len(usedIndices) - temp_dataset.get("Metadata").attrs["nbClass"] = NB_CLASS - temp_dataset.create_dataset("Labels", data=newLabels) - temp_dataset.get("Labels").attrs["names"] = newLabelsNames - filterViews(datasetFile, temp_dataset, views, usedIndices) + datasetFile.get("Metadata").attrs["datasetLength"] = len(usedIndices) + datasetFile.get("Metadata").attrs["nbClass"] = NB_CLASS + datasetFile.create_dataset("Labels", data=newLabels) + datasetFile.get("Labels").attrs["names"] = newLabelsNames + filterViews(baseDatasetFile, datasetFile, views, usedIndices) labelsDictionary = dict((labelIndex, labelName.decode("utf-8")) for labelIndex, labelName in - enumerate(temp_dataset.get("Labels").attrs["names"])) - return temp_dataset, labelsDictionary + enumerate(datasetFile.get("Labels").attrs["names"])) + + if add_noise: + datasetFile = add_gaussian_noise(datasetFile, randomState, pathF, dataset_name, noise_std) + else: + pass + return datasetFile, labelsDictionary + + +def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, noise_std=0.15): + """In this function, we add a guaussian noise centered in 0 with specified + std to each view, according to it's range (the noise will be + mutliplied by this range) and we crop the noisy signal according to the + view's attributes limits. + This is done by creating a new dataset, to keep clean data.""" + noisy_dataset = h5py.File(path_f+dataset_name+"_noised.hdf5", "w") + dataset_file.copy("Metadata", noisy_dataset) + dataset_file.copy("Labels", noisy_dataset) + for view_index in range(dataset_file.get("Metadata").attrs["nbView"]): + dataset_file.copy("View"+str(view_index), noisy_dataset) + # dataset_file.close() + for view_index in range(noisy_dataset.get("Metadata").attrs["nbView"]): + view_name = "View" + str(view_index) + view_dset = noisy_dataset.get(view_name) + orig_shape = view_dset.value.shape + view_ranges = view_dset.attrs["ranges"] + view_limits = view_dset.attrs["limits"] + normal_dist = random_state.normal(0, noise_std, view_dset.value.shape) + noise = normal_dist*view_ranges + noised_data = view_dset.value+noise + noised_data = np.where(noised_data<view_limits[:,0], view_limits[:,0], noised_data) + noised_data = np.where(noised_data>view_limits[:,1], view_limits[:,1], noised_data) + noisy_dataset[view_name][...] = noised_data + final_shape = noised_data.shape + return noisy_dataset + + -def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False, delimiter=","): +def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, randomState, full=False, add_noise=False, noise_std=0.15, delimiter=","): # TODO : Update this one labelsNames = np.genfromtxt(pathF + nameDB + "-labels-names.csv", dtype='str', delimiter=delimiter) datasetFile = h5py.File(pathF + nameDB + ".hdf5", "w") diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index c0e0f072d2404244a46320b609d1613861963814..1f0b1101673098624e35d478bba47adfe617bd07 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -43,6 +43,11 @@ def parseTheArgs(arguments): groupStandard.add_argument('-full', action='store_true', help='Use option to use full dataset and no labels or view filtering') groupStandard.add_argument('-debug', action='store_true', help='Use option to bebug implemented algorithms') + groupStandard.add_argument('-add_noise', action='store_true', + help='Use option to add noise to the data') + groupStandard.add_argument('--noise_std', metavar='FLOAT', action='store', + help='The std of the gaussian noise that will be added to the data.', + type=float, default=0.15) groupClass = parser.add_argument_group('Classification arguments') @@ -172,11 +177,11 @@ def parseTheArgs(arguments): groupQarBoost.add_argument('--QarB_epsilon', metavar='FLOAT', type=float, action='store', help='Set the epsilon parameter for QarBoost', default=1e-08) - groupQarBoostv2 = parser.add_argument_group('QarBoostv2 arguments') - groupQarBoostv2.add_argument('--QarB2_mu', metavar='FLOAT', type=float, action='store', - help='Set the mu parameter for QarBoostv2', default=0.001) - groupQarBoostv2.add_argument('--QarB2_epsilon', metavar='FLOAT', type=float, action='store', - help='Set the epsilon parameter for QarBoostv2', default=1e-08) + groupCGreed = parser.add_argument_group('CGreed arguments') + groupCGreed.add_argument('--CGR_stumps', metavar='INT', type=int, action='store', + help='Set the n_stumps_per_attribute parameter for CGreed', default=1) + groupCGreed.add_argument('--CGR_n_iter', metavar='INT', type=int, action='store', + help='Set the n_max_iterations parameter for CGreed', default=100) groupQarBoostv3 = parser.add_argument_group('QarBoostv3 arguments') groupQarBoostv3.add_argument('--QarB3_mu', metavar='FLOAT', type=float, action='store',