From d7b1104d1d067562066881972d54093364d8a933 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Fri, 29 Nov 2019 10:33:27 -0500 Subject: [PATCH] Examples working in the python console --- .gitignore | 2 +- MANIFEST.in | 3 +- README.md | 45 ++-- multiview_platform/examples/__init__.py | 0 .../config_files/config_example_1.yml | 239 ++++++++++++++++++ multiview_platform/execute.py | 8 +- .../exec_classif.py | 22 +- .../monoview/analyze_result.py | 8 +- .../monoview/exec_classif_mono_view.py | 4 +- .../monoview_classifiers/svm_linear.py | 3 +- .../multiview/exec_multiview.py | 12 +- .../multiview/multiview_utils.py | 2 +- .../result_analysis.py | 4 +- .../utils/execution.py | 16 +- .../utils/get_multiview_db.py | 8 +- .../test_ExecClassifMonoView.py | 2 +- multiview_platform/versions.py | 158 ++++++------ requirements.txt | 2 +- setup.py | 12 +- 19 files changed, 398 insertions(+), 152 deletions(-) create mode 100644 multiview_platform/examples/__init__.py create mode 100644 multiview_platform/examples/config_files/config_example_1.yml diff --git a/.gitignore b/.gitignore index 4d1a1ab0..a071ee73 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,4 @@ dist* docs/build* multiview_platform/.idea/* .gitignore - +multiview_platform/examples/results* diff --git a/MANIFEST.in b/MANIFEST.in index 91f371be..793057f9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ -include *.md \ No newline at end of file +include *.md +include multiview_platform/examples* \ No newline at end of file diff --git a/README.md b/README.md index 55028c1a..59714299 100644 --- a/README.md +++ b/README.md @@ -10,47 +10,44 @@ This project aims to be an easy-to-use solution to run a prior benchmark on a da To be able to use this project, you'll need : -* [Python 2.7](https://docs.python.org/2/) or [Python 3](https://docs.python.org/3/) +* [Python 3.6](https://docs.python.org/3/) And the following python modules : -* [pyscm](https://github.com/aldro61/pyscm) - Set Covering Machine, Marchand, M., & Taylor, J. S. (2003) by A.Drouin, F.Brochu, G.Letarte St-Pierre, M.Osseni, P-L.Plante -* [numpy](http://www.numpy.org/), [scipy](https://scipy.org/) -* [matplotlib](http://matplotlib.org/) - Used to plot results -* [sklearn](http://scikit-learn.org/stable/) - Used for the monoview classifiers -* [joblib](https://pypi.python.org/pypi/joblib) - Used to compute on multiple threads -* [h5py](https://www.h5py.org) - Used to generate HDF5 datasets on hard drive and use them to spare RAM -* [pickle](https://docs.python.org/3/library/pickle.html) - Used to store some results -* ([graphviz](https://pypi.python.org/pypi/graphviz) - Used for decision tree interpretation) -* [pandas](https://pandas.pydata.org/) - + +* [numpy](http://www.numpy.org/), [scipy](https://scipy.org/), +* [matplotlib](http://matplotlib.org/) - Used to plot results, +* [sklearn](http://scikit-learn.org/stable/) - Used for the monoview classifiers, +* [joblib](https://pypi.python.org/pypi/joblib) - Used to compute on multiple threads, +* [h5py](https://www.h5py.org) - Used to generate HDF5 datasets on hard drive and use them to spare RAM, +* [pickle](https://docs.python.org/3/library/pickle.html) - Used to store some results, +* [pandas](https://pandas.pydata.org/) - Used to manipulate data efficiently, +* [six](https://pypi.org/project/six/) - +* [m2r](https://pypi.org/project/m2r/) - Used to generate documentation from the readme, +* [docutils](https://pypi.org/project/docutils/) - Used to generate documentation, +* [pyyaml](https://pypi.org/project/PyYAML/) - Used to read the config files, +* [plotly](https://plot.ly/) - Used to generate interactive HTML visuals. They are all tested in `multiview-machine-mearning-omis/multiview_platform/MonoMutliViewClassifiers/Versions.py` which is automatically checked each time you run the `execute` script ### Installing -cd in the project directory -and install the project +Once you cloned the project from this repository, you just have to use : ``` -cd multiview-machine-learning-omis pip install -e . ``` +In the `multiview_machine-learning-omis` directory. ### Running on simulated data In order to run it you'll need to try on **simulated** data with the command +```python +from multiview_platform.execute import execute +execute() ``` -cd multiview-machine-learning-omis/multiview_platform -python execute.py -log -``` -Results will be stored in `multiview-machine-learning-omis/multiview_platform/mono_multi_view_classifiers/results/` - -If you want to run a multiclass (one versus one) benchmark on simulated data, use : -``` -cd multiview-machine-learning-omis/multiview_platform -python execute.py -log --CL_nbClass 3 -``` +Results will be stored in the results directory of the installation path : `path/to/install/multiview-machine-learning-omis/results`. -If no path is specified, simulated hdf5 datasets are stored in `multiview-machine-learning-omis/data` +And simulated hdf5 datasets are stored in `path/to/install/multiview-machine-learning-omis/data` ### Discovering the arguments diff --git a/multiview_platform/examples/__init__.py b/multiview_platform/examples/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/multiview_platform/examples/config_files/config_example_1.yml b/multiview_platform/examples/config_files/config_example_1.yml new file mode 100644 index 00000000..4604d09a --- /dev/null +++ b/multiview_platform/examples/config_files/config_example_1.yml @@ -0,0 +1,239 @@ +# The base configuration of the benchmark +Base : + # Enable logging + log: True + # The name of each dataset in the directory on which the benchmark should be run + name: ["plausible"] + # A label for the resul directory + label: "_" + # The type of dataset, currently supported ".hdf5", and ".csv" + type: ".hdf5" + # The views to use in the banchmark, an empty value will result in using all the views + views: + # The path to the directory where the datasets are stored + pathf: "examples/data/example_1/" + # The niceness of the processes, useful to lower their priority + nice: 0 + # The random state of the benchmark, useful for reproducibility + random_state: 42 + # The number of parallel computing threads + nb_cores: 1 + # Used to run the benchmark on the full dataset + full: False + # Used to be able to run more than one benchmark per minute + debug: False + # To add noise to the data, will add gaussian noise with noise_std + add_noise: False + noise_std: 0.0 + # The directory in which the results will be stored + res_dir: "examples/results/example_1/" + +# All the classification-realted configuration options +Classification: + # If the dataset is multiclass, will use this multiclass-to-biclass method + multiclass_method: "oneVersusOne" + # The ratio number of test exmaples/number of train examples + split: 0.8 + # The nubmer of folds in the cross validation process when hyper-paramter optimization is performed + nb_folds: 2 + # The number of classes to select in the dataset + nb_class: 2 + # The name of the classes to select in the dataset + classes: + # The type of algorithms to run during the benchmark (monoview and/or multiview) + type: ["monoview","multiview"] + # The name of the monoview algorithms to run, ["all"] to run all the available classifiers + algos_monoview: ["all"] + # The names of the multiview algorithms to run, ["all"] to run all the available classifiers + algos_multiview: ["all"] + # The number of times the benchamrk is repeated with different train/test + # split, to have more statistically significant results + stats_iter: 1 + # The metrics that will be use din the result analysis + metrics: ["accuracy_score", "f1_score"] + # The metric that will be used in the hyper-parameter optimization process + metric_princ: "f1_score" + # The type of hyper-parameter optimization method + hps_type: "randomized_search" + # The number of iteration in the hyper-parameter optimization process + hps_iter: 2 + + +# The following arguments are classifier-specific, and are documented in each +# of the corresponding modules. + +# In order to run multiple sets of parameters, use multiple values in the +# following lists, and set hps_type to None. + +##################################### +# The Monoview Classifier arguments # +##################################### + + +random_forest: + n_estimators: [25] + max_depth: [3] + criterion: ["entropy"] + +svm_linear: + C: [1] + +svm_rbf: + C: [1] + +svm_poly: + C: [1] + degree: [2] + +adaboost: + n_estimators: [50] + base_estimator: ["DecisionTreeClassifier"] + +adaboost_pregen: + n_estimators: [50] + base_estimator: ["DecisionTreeClassifier"] + n_stumps: [1] + +adaboost_graalpy: + n_iterations: [50] + n_stumps: [1] + +decision_tree: + max_depth: [10] + criterion: ["gini"] + splitter: ["best"] + +decision_tree_pregen: + max_depth: [10] + criterion: ["gini"] + splitter: ["best"] + n_stumps: [1] + +sgd: + loss: ["hinge"] + penalty: [l2] + alpha: [0.0001] + +knn: + n_neighbors: [5] + weights: ["uniform"] + algorithm: ["auto"] + +scm: + model_type: ["conjunction"] + max_rules: [10] + p: [0.1] + +scm_pregen: + model_type: ["conjunction"] + max_rules: [10] + p: [0.1] + n_stumps: [1] + +cq_boost: + mu: [0.01] + epsilon: [1e-06] + n_max_iterations: [5] + n_stumps: [1] + +cg_desc: + n_max_iterations: [10] + n_stumps: [1] + +cb_boost: + n_max_iterations: [10] + n_stumps: [1] + +lasso: + alpha: [1] + max_iter: [2] + +gradient_boosting: + n_estimators: [2] + + +###################################### +# The Multiview Classifier arguments # +###################################### + +weighted_linear_early_fusion: + view_weights: [null] + monoview_classifier_name: ["decision_tree"] + monoview_classifier_config: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +entropy_fusion: + classifier_names: [["decision_tree"]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +disagree_fusion: + classifier_names: [["decision_tree"]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + + +double_fault_fusion: + classifier_names: [["decision_tree"]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +difficulty_fusion: + classifier_names: [["decision_tree"]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +scm_late_fusion: + classifier_names: [["decision_tree"]] + p: 0.1 + max_rules: 10 + model_type: 'conjunction' + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +majority_voting_fusion: + classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +bayesian_inference_fusion: + classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +weighted_linear_late_fusion: + classifier_names: [["decision_tree", "decision_tree", "decision_tree", ]] + classifier_configs: + decision_tree: + max_depth: [1] + criterion: ["gini"] + splitter: ["best"] + +mumbo: + base_estimator: [null] + n_estimators: [10] + best_view_mode: ["edge"] \ No newline at end of file diff --git a/multiview_platform/execute.py b/multiview_platform/execute.py index c4afa957..a23c64f2 100644 --- a/multiview_platform/execute.py +++ b/multiview_platform/execute.py @@ -1,13 +1,17 @@ """This is the execution module, used to execute the code""" +import os -def execute(): +def execute(config_path=os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_1.yml")): from multiview_platform import versions as vs vs.test_versions() import sys from multiview_platform.mono_multi_view_classifiers import exec_classif - exec_classif.exec_classif(sys.argv[1:]) + if sys.argv[1:]: + exec_classif.exec_classif(sys.argv[1:]) + else: + exec_classif.exec_classif(["--config_path", config_path]) if __name__ == "__main__": diff --git a/multiview_platform/mono_multi_view_classifiers/exec_classif.py b/multiview_platform/mono_multi_view_classifiers/exec_classif.py index e5a66352..3eb92a7f 100644 --- a/multiview_platform/mono_multi_view_classifiers/exec_classif.py +++ b/multiview_platform/mono_multi_view_classifiers/exec_classif.py @@ -62,8 +62,7 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args): if "monoview" in cl_type: if monoview_algos == ['all']: benchmark["monoview"] = [name for _, name, isPackage in - pkgutil.iter_modules([ - "./mono_multi_view_classifiers/monoview_classifiers"]) + pkgutil.iter_modules(monoview_classifiers.__path__) if not isPackage] else: @@ -72,8 +71,7 @@ def init_benchmark(cl_type, monoview_algos, multiview_algos, args): if "multiview" in cl_type: if multiview_algos==["all"]: benchmark["multiview"] = [name for _, name, isPackage in - pkgutil.iter_modules([ - "./mono_multi_view_classifiers/multiview_classifiers"]) + pkgutil.iter_modules(multiview_classifiers.__path__) if not isPackage] else: benchmark["multiview"] = multiview_algos @@ -500,23 +498,23 @@ def benchmark_init(directory, classification_indices, labels, labels_dictionary, """ logging.debug("Start:\t Benchmark initialization") - if not os.path.exists(os.path.dirname(directory + "train_labels.csv")): + if not os.path.exists(os.path.dirname(os.path.join(directory, "train_labels.csv"))): try: - os.makedirs(os.path.dirname(directory + "train_labels.csv")) + os.makedirs(os.path.dirname(os.path.join(directory, "train_labels.csv"))) except OSError as exc: if exc.errno != errno.EEXIST: raise train_indices = classification_indices[0] train_labels = labels[train_indices] - np.savetxt(directory + "train_labels.csv", train_labels, delimiter=",") - np.savetxt(directory + "train_indices.csv", classification_indices[0], + np.savetxt(os.path.join(directory, "train_labels.csv"), train_labels, delimiter=",") + np.savetxt(os.path.join(directory, "train_indices.csv"), classification_indices[0], delimiter=",") results_monoview = [] folds = k_folds.split(np.arange(len(train_labels)), train_labels) min_fold_len = int(len(train_labels) / k_folds.n_splits) for fold_index, (train_cv_indices, test_cv_indices) in enumerate(folds): - file_name = directory + "/folds/test_labels_fold_" + str( - fold_index) + ".csv" + file_name = os.path.join(directory, "folds", "test_labels_fold_" + str( + fold_index) + ".csv") if not os.path.exists(os.path.dirname(file_name)): try: os.makedirs(os.path.dirname(file_name)) @@ -784,7 +782,7 @@ def exec_classif(arguments): """Main function to execute the benchmark""" start = time.time() args = execution.parse_the_args(arguments) - args = configuration.get_the_args(args.path_config) + args = configuration.get_the_args(args.config_path) os.nice(args["Base"]["nice"]) nb_cores = args["Base"]["nb_cores"] if nb_cores == 1: @@ -845,7 +843,7 @@ def exec_classif(arguments): if metrics == [["all"]]: metrics_names = [name for _, name, isPackage in pkgutil.iter_modules( - ['./mono_multi_view_classifiers/metrics']) if + [os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'metrics')]) if not isPackage and name not in ["framework", "log_loss", "matthews_corrcoef", "roc_auc_score"]] diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py index faecdbc2..e5d00b67 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/analyze_result.py @@ -34,7 +34,7 @@ def getDBConfigString(name, feat, classification_indices, shape, def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, - directory, y_test): + output_file_name, y_test): classifierConfigString = "Classifier configuration : \n" classifierConfigString += "\t- " + classifier.getConfig()[5:] + "\n" classifierConfigString += "\t- Executed on " + str(nbCores) + " core(s) \n" @@ -42,7 +42,7 @@ def getClassifierConfigString(gridSearch, nbCores, nIter, clKWARGS, classifier, classifierConfigString += "\t- Got configuration using randomized search with " + str( nIter) + " iterations \n" classifierConfigString += "\n\n" - classifierInterpretString = classifier.getInterpret(directory, y_test) + classifierInterpretString = classifier.getInterpret(output_file_name, y_test) return classifierConfigString, classifierInterpretString @@ -66,7 +66,7 @@ def getMetricScore(metric, y_train, y_train_pred, y_test, y_test_pred): def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics_list, nIter, feat, CL_type, clKWARGS, classLabelsNames, shape, y_train, y_train_pred, y_test, y_test_pred, time, - random_state, classifier, directory): + random_state, classifier, output_file_name): metricsScores = {} metricModule = getattr(metrics, metrics_list[0][0]) trainScore = metricModule.score(y_train, y_train_pred) @@ -78,7 +78,7 @@ def execute(name, learningRate, KFolds, nbCores, gridSearch, metrics_list, nIter stringAnalysis += getDBConfigString(name, feat, learningRate, shape, classLabelsNames, KFolds) classifierConfigString, classifierIntepretString = getClassifierConfigString( - gridSearch, nbCores, nIter, clKWARGS, classifier, directory, y_test) + gridSearch, nbCores, nIter, clKWARGS, classifier, output_file_name, y_test) stringAnalysis += classifierConfigString for metric in metrics_list: metricString, metricScore = getMetricScore(metric, y_train, diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index 8e6584ef..9b5490f2 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -154,9 +154,9 @@ def initConstants(args, X, classificationIndices, labels_names, labelsString = "-".join(labels_names) CL_type_string = CL_type timestr = time.strftime("%Y_%m_%d-%H_%M_%S") - outputFileName = directory + CL_type_string + "/" + view_name + "/" + timestr + "-results-" + CL_type_string + "-" + labelsString + \ + outputFileName = os.path.join(directory, CL_type_string, view_name, timestr + "-results-" + CL_type_string + "-" + labelsString + \ '-learnRate_{0:.2f}'.format( - learningRate) + '-' + name + "-" + view_name + "-" + learningRate) + '-' + name + "-" + view_name + "-") if not os.path.exists(os.path.dirname(outputFileName)): try: os.makedirs(os.path.dirname(outputFileName)) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py index ad867f07..47c983c7 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview_classifiers/svm_linear.py @@ -1,3 +1,5 @@ +import numpy as np + from multiview_platform.mono_multi_view_classifiers.monoview_classifiers.additions.SVCClassifier import SVCClassifier from ..monoview.monoview_utils import CustomUniform, BaseMonoviewClassifier @@ -36,7 +38,6 @@ class SVMLinear(SVCClassifier, BaseMonoviewClassifier): def getInterpret(self, directory, y_test): interpret_string = "" - import numpy as np self.feature_importances_ = (self.coef_/np.sum(self.coef_)).reshape((self.coef_.shape[1],)) return interpret_string diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py index 761a970d..7c2d3944 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/exec_multiview.py @@ -102,9 +102,9 @@ def save_results(classifier, labels_dictionary, string_analysis, views, classifi labels_string = "-".join(labels_set) timestr = time.strftime("%Y_%m_%d-%H_%M_%S") cl_type_string = classifier.short_name - output_file_name = directory + "/" + cl_type_string + "/" + timestr + \ - "-results-" + cl_type_string + "-" + views_string + '-' + labels_string + \ - '-learnRate_{0:.2f}'.format(learning_rate) + '-' + name + output_file_name = os.path.join(directory, cl_type_string, + timestr + "-results-" + cl_type_string + "-" + views_string + '-' + labels_string + \ + '-learnRate_{0:.2f}'.format(learning_rate) + '-' + name) if not os.path.exists(os.path.dirname(output_file_name)): try: os.makedirs(os.path.dirname(output_file_name)) @@ -400,12 +400,12 @@ if __name__ == "__main__": logfilename = "gen a good logfilename" - logfile = directory + logfilename + logfile = os.path.join(directory, logfilename) if os.path.isfile(logfile + ".log"): for i in range(1, 20): testFileName = logfilename + "-" + str(i) + ".log" - if not os.path.isfile(directory + testFileName): - logfile = directory + testFileName + if not os.path.isfile(os.path.join(directory, testFileName)): + logfile = os.path.join(directory, testFileName) break else: logfile += ".log" diff --git a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py index fcc0ec1a..5fe42d49 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview/multiview_utils.py @@ -44,7 +44,7 @@ class BaseMultiviewClassifier(BaseEstimator, ClassifierMixin): def __init__(self, random_state): self.random_state = random_state - self.short_name = self.__class__.__name__ + self.short_name = self.__module__.split(".")[-1] self.weird_strings = {} def gen_best_params(self, detector): diff --git a/multiview_platform/mono_multi_view_classifiers/result_analysis.py b/multiview_platform/mono_multi_view_classifiers/result_analysis.py index 02514a8a..1dfd5460 100644 --- a/multiview_platform/mono_multi_view_classifiers/result_analysis.py +++ b/multiview_platform/mono_multi_view_classifiers/result_analysis.py @@ -58,9 +58,9 @@ def plot_results_noise(directory, noise_results, metric_to_plot, name, width=0.1 plt.title(name) plt.xticks(noise_levels) plt.xlabel("Noise level") - plt.savefig(directory+name+"_noise_analysis.png") + plt.savefig(os.path.join(directory, name+"_noise_analysis.png")) plt.close() - df.to_csv(directory+name+"_noise_analysis.csv") + df.to_csv(os.path.join(directory, name+"_noise_analysis.csv")) def plot_metric_scores(train_scores, test_scores, names, nb_results, metric_name, diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index 8bec9645..649dfd29 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -21,7 +21,7 @@ def parse_the_args(arguments): fromfile_prefix_chars='@') groupStandard = parser.add_argument_group('Standard arguments') - groupStandard.add_argument('--path_config', metavar='STRING', action='store', + groupStandard.add_argument('--config_path', metavar='STRING', action='store', help='Path to the hdf5 dataset or database ' 'folder (default: %(default)s)', default='../config_files/config.yml') @@ -53,6 +53,7 @@ def init_random_state(random_state_arg, directory): random_state : numpy.random.RandomState object This random state will be used all along the benchmark . """ + if random_state_arg is None: random_state = np.random.RandomState(random_state_arg) else: @@ -63,7 +64,7 @@ def init_random_state(random_state_arg, directory): file_name = random_state_arg with open(file_name, 'rb') as handle: random_state = pickle.load(handle) - with open(directory + "random_state.pickle", "wb") as handle: + with open(os.path.join(directory, "random_state.pickle"), "wb") as handle: pickle.dump(random_state, handle) return random_state @@ -152,6 +153,7 @@ def init_log_file(name, views, cl_type, log, debug, label, if views is None: views=[] noise_string = "n_"+str(int(noise_std*100)) + result_directory = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), result_directory) if debug: result_directory = os.path.join(result_directory, name, noise_string, "debug_started_" + time.strftime("%Y_%m_%d-%H_%M_%S") + "_" + label) @@ -299,7 +301,7 @@ def gen_direcorties_names(directory, stats_iter): if stats_iter > 1: directories = [] for i in range(stats_iter): - directories.append(directory + "iter_" + str(i + 1) + "/") + directories.append(os.path.join(directory, "iter_" + str(i + 1))) else: directories = [directory] return directories @@ -308,9 +310,9 @@ def gen_direcorties_names(directory, stats_iter): def find_dataset_names(path, type, names): """This function goal is to browse the dataset directory and extrats all the needed dataset names.""" - + module_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) available_file_names = [file_name.strip().split(".")[0] - for file_name in os.listdir(path) + for file_name in os.listdir(os.path.join(module_path, path)) if file_name.endswith(type)] if names == ["all"]: return available_file_names @@ -379,10 +381,10 @@ def gen_argument_dictionaries(labels_dictionary, directories, multiclass_labels, "labels_dictionary": {0: labels_dictionary[labels_combination[0]], 1: labels_dictionary[ labels_combination[1]]}, - "directory": directories[iter_index] + + "directory": os.path.join(directories[iter_index], labels_dictionary[labels_combination[0]] + "-vs-" + - labels_dictionary[labels_combination[1]] + "/", + labels_dictionary[labels_combination[1]]), "classification_indices": [ indices_multiclass[combination_index][0][iter_index], indices_multiclass[combination_index][1][iter_index], diff --git a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py index 11e7bd3b..5d9c9602 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py @@ -38,9 +38,9 @@ def get_plausible_db_hdf5(features, path, file_name, nb_class=3, nb_features=10): """Used to generate a plausible dataset to test the algorithms""" - if not os.path.exists(os.path.dirname(path + "plausible.hdf5")): + if not os.path.exists(os.path.dirname(os.path.join(path, "plausible.hdf5"))): try: - os.makedirs(os.path.dirname(path + "plausible.hdf5")) + os.makedirs(os.path.dirname(os.path.join(path, "plausible.hdf5"))) except OSError as exc: if exc.errno != errno.EEXIST: raise @@ -133,14 +133,14 @@ def get_classic_db_hdf5(views, path_f, name_DB, nb_class, asked_labels_names, path_for_new="../data/"): """Used to load a hdf5 database""" if full: - dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") + dataset_file = h5py.File(os.path.join(path_f, name_DB + ".hdf5"), "r") dataset = Dataset(hdf5_file=dataset_file) dataset_name = name_DB labels_dictionary = dict((label_index, label_name) for label_index, label_name in enumerate(dataset.get_label_names())) else: - dataset_file = h5py.File(path_f + name_DB + ".hdf5", "r") + dataset_file = h5py.File(os.path.join(path_f, name_DB + ".hdf5"), "r") dataset = Dataset(hdf5_file=dataset_file) labels_dictionary = dataset.select_views_and_labels(nb_labels=nb_class, selected_label_names=asked_labels_names, diff --git a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py index 8b245793..f540dab6 100644 --- a/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py +++ b/multiview_platform/tests/test_mono_view/test_ExecClassifMonoView.py @@ -31,7 +31,7 @@ class Test_initConstants(unittest.TestCase): np.array([1, 3, 5, 7, 9])] cls.labels_names = ["test_true", "test_false"] cls.name = "test" - cls.directory = tmp_path+"test_dir/" + cls.directory = os.path.join(tmp_path, "test_dir/") def test_simple(cls): kwargs, \ diff --git a/multiview_platform/versions.py b/multiview_platform/versions.py index 23661197..b60d9854 100644 --- a/multiview_platform/versions.py +++ b/multiview_platform/versions.py @@ -8,88 +8,88 @@ def test_versions(): is_up_to_date = True to_install = [] - try: - import sys - except ImportError: - raise - - try: - import cvxopt - except ImportError: - is_up_to_date = False - to_install.append("cvxopt") - - try: - import pyscm - except ImportError: - is_up_to_date = False - to_install.append("pyscm") - - try: - import numpy - except ImportError: - is_up_to_date = False - to_install.append("numpy") - - try: - import scipy - except ImportError: - is_up_to_date = False - to_install.append("scipy") - - try: - import matplotlib - except ImportError: - is_up_to_date = False - to_install.append("matplotlib") - - try: - import sklearn - except ImportError: - is_up_to_date = False - to_install.append("sklearn") - - try: - import logging - except ImportError: - is_up_to_date = False - to_install.append("logging") - - try: - import joblib - except ImportError: - is_up_to_date = False - to_install.append("joblib") - - try: - import argparse - except ImportError: - is_up_to_date = False - to_install.append("argparse") - - try: - import h5py # - except ImportError: - is_up_to_date = False - to_install.append("h5py") - # try: - # import graphviz # + # import sys + # except ImportError: + # raise + # + # try: + # import cvxopt # except ImportError: # is_up_to_date = False - # to_install.append("graphviz") - - try: - import pickle # - except ImportError: - is_up_to_date = False - to_install.append("pickle") - - if not is_up_to_date: - print( - "You can't run at the moment, please install the following modules : \n" + "\n".join( - to_install)) - quit() + # to_install.append("cvxopt") + # + # try: + # import pyscm + # except ImportError: + # is_up_to_date = False + # to_install.append("pyscm") + # + # try: + # import numpy + # except ImportError: + # is_up_to_date = False + # to_install.append("numpy") + # + # try: + # import scipy + # except ImportError: + # is_up_to_date = False + # to_install.append("scipy") + # + # try: + # import matplotlib + # except ImportError: + # is_up_to_date = False + # to_install.append("matplotlib") + # + # try: + # import sklearn + # except ImportError: + # is_up_to_date = False + # to_install.append("sklearn") + # + # try: + # import logging + # except ImportError: + # is_up_to_date = False + # to_install.append("logging") + # + # try: + # import joblib + # except ImportError: + # is_up_to_date = False + # to_install.append("joblib") + # + # try: + # import argparse + # except ImportError: + # is_up_to_date = False + # to_install.append("argparse") + # + # try: + # import h5py # + # except ImportError: + # is_up_to_date = False + # to_install.append("h5py") + # + # # try: + # # import graphviz # + # # except ImportError: + # # is_up_to_date = False + # # to_install.append("graphviz") + # + # try: + # import pickle # + # except ImportError: + # is_up_to_date = False + # to_install.append("pickle") + # + # if not is_up_to_date: + # print( + # "You can't run at the moment, please install the following modules : \n" + "\n".join( + # to_install)) + # quit() if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index 9802205d..940b0782 100755 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,6 @@ joblib==0.13.2 kiwisolver==1.1.0 numpy==1.16.4 pyparsing==2.4.0 --e git+https://github.com/aldro61/pyscm.git#egg=pyscm python-dateutil==2.8.0 scikit-learn==0.19.0 scipy==1.3.0 @@ -16,3 +15,4 @@ docutils==0.12 pyyaml==3.12 cvxopt==1.2.0 plotly==4.2.1 +matplotlib==3.1.1 diff --git a/setup.py b/setup.py index 885aa563..3a952d19 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- +#Extracting requrements from requirements.txt +with open('requirements.txt') as f: + requirements = f.read().splitlines() # from Cython.Build import cythonize from setuptools import setup, find_packages @@ -52,10 +55,11 @@ def setup_package(): # dependency_links=['https://github.com/aldro61/pyscm.git#egg=pyscm'], # Une url qui pointe vers la page officielle de votre lib url='http://github.com/babau1/multiview-machine-learning-omis/', - install_requires=['numpy>=1.16', 'scipy>=0.16','scikit-learn==0.19', - 'matplotlib', 'h5py', 'joblib', - 'pandas', 'm2r', 'pyyaml', 'pyscm @ git+https://github.com/aldro61/pyscm', - 'cvxopt', 'plotly==4.2.1'], + install_requires=requirements, + # install_requires=['numpy>=1.16', 'scipy>=0.16','scikit-learn==0.19', + # 'matplotlib', 'h5py', 'joblib', + # 'pandas', 'm2r', 'pyyaml', 'pyscm @ git+https://github.com/aldro61/pyscm', + # 'cvxopt', 'plotly==4.2.1'], # Il est d'usage de mettre quelques metadata à propos de sa lib # Pour que les robots puissent facilement la classer. -- GitLab