Skip to content
Snippets Groups Projects
Commit 5cc4a555 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

name changed

parent 4d630a8a
Branches
Tags
No related merge requests found
Showing
with 5 additions and 1532 deletions
......@@ -23,7 +23,7 @@ doc:
- export LC_ALL=$(locale -a | grep en_US)
- export LANG=$(locale -a | grep en_US)
- pip3 install -e . --no-deps
- sphinx-apidoc -o docs/source multiview_platform
- sphinx-apidoc -o docs/source summit
- cd docs/source
- sphinx-build -b html . ../build
- cd ../..
......@@ -45,7 +45,7 @@ pages:
- export LANG=$(locale -a | grep en_US)
- pip3 install -e . --no-deps
- pytest-3
- sphinx-apidoc -o docs/source multiview_platform
- sphinx-apidoc -o docs/source summit
- cd docs/source
- sphinx-build -b html . ../build
- cd ../..
......
......@@ -71,7 +71,7 @@ to read it carefully before playing around with the parameters.
You can create your own configuration file. In order to run the platform with it, run :
```python
from multiview_platform.execute import execute
from summit.execute import execute
execute(config_path="/absolute/path/to/your/config/file")
```
......
......@@ -22,11 +22,11 @@ import os
import sys
sys.path.insert(0, os.path.abspath('.'))
sys.path.insert(0, os.path.abspath('../../multiview_platform'))
sys.path.insert(0, os.path.abspath('../../summit'))
sys.path.insert(0, os.path.abspath('../..'))
file_loc = os.path.split(__file__)[0]
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(file_loc), '.')))
# import multiview_platform
# import summit
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
......
"""This is a test docstring to test stuff"""
__version__ = "0.0.0.0"
from . import mono_multi_view_classifiers, execute
# The base configuration of the benchmark
# Enable logging
log: True
# The name of each dataset in the directory on which the benchmark should be run
name: "digits_doc"
# A label for the resul directory
label: "example_0"
# The type of dataset, currently supported ".hdf5", and ".csv"
file_type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored, an absolute path is advised
pathf: "examples/data/"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: True
# Used to be able to run more than one benchmark per minute
debug: False
# The directory in which the results will be stored, an absolute path is advised
res_dir: "examples/results/example_0/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
# All the classification-realted configuration options
# The ratio of test examples/number of train examples
split: 0.25
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 2
# The number of classes to select in the dataset
nb_class:
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree"]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["weighted_linear_early_fusion", "weighted_linear_late_fusion",]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 1
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
f1_score:
average: "micro"
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "accuracy_score"
# The type of hyper-parameter optimization method
hps_type: "None"
# The number of iteration in the hyper-parameter optimization process
hps_args: {}
### Configuring the hyper-parameters for the classifiers
decision_tree:
max_depth: 3
weighted_linear_early_fusion:
monoview_classifier_name: "decision_tree"
monoview_classifier_config:
decision_tree:
max_depth: 6
weighted_linear_late_fusion:
classifiers_names: "decision_tree"
classifier_configs:
decision_tree:
max_depth: 3
# The base configuration of the benchmark
# Enable logging
log: True
# The name of each dataset in the directory on which the benchmark should be run
name: "doc_summit"
# A label for the resul directory
label: "example_1"
# The type of dataset, currently supported ".hdf5", and ".csv"
file_type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored, an absolute path is advised
pathf: "examples/data/mkljlj"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: True
# Used to be able to run more than one benchmark per minute
debug: False
# The directory in which the results will be stored, an absolute path is advised
res_dir: "examples/results/example_1/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
# All the classification-realted configuration options
# The ratio of test examples/number of train examples
split: 0.35
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 2
# The number of classes to select in the dataset
nb_class:
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree"]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["weighted_linear_late_fusion",]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 1
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
f1_score:
average: "micro"
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "accuracy_score"
# The type of hyper-parameter optimization method
hps_type: "None"
# The number of iteration in the hyper-parameter optimization process
hps_args: {}
### Configuring the hyper-parameters for the classifiers
decision_tree:
max_depth: 3
weighted_linear_early_fusion:
monoview_classifier_name: "decision_tree"
monoview_classifier_config:
decision_tree:
max_depth: 6
weighted_linear_late_fusion:
classifiers_names: "decision_tree"
classifier_configs:
decision_tree:
max_depth: 3
# The base configuration of the benchmark
# Enable logging
log: True
# The name of each dataset in the directory on which the benchmark should be run
name: "doc_summit"
# A label for the resul directory
label: "example_2_1_1"
# The type of dataset, currently supported ".hdf5", and ".csv"
file_type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored, an absolute path is advised
pathf: "examples/data/"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: True
# Used to be able to run more than one benchmark per minute
debug: False
# The directory in which the results will be stored, an absolute path is advised
res_dir: "examples/results/example_2_1_1/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
# All the classification-realted configuration options
# If the dataset is multiclass, will use this multiclass-to-biclass method
multiclass_method: "oneVersusOne"
# The ratio number of test exmaples/number of train examples
split: 0.8
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 2
# The number of classes to select in the dataset
nb_class: 2
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree", "adaboost", ]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["weighted_linear_late_fusion", ]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 1
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
f1_score:
average: "micro"
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "accuracy_score"
# The type of hyper-parameter optimization method
hps_type: None
# The number of iteration in the hyper-parameter optimization process
hps_args: {}
decision_tree:
max_depth: 3
adaboost:
base_estimator: "DecisionTreeClassifier"
n_estimators: 50
weighted_linear_late_fusion:
classifiers_names: "decision_tree"
classifier_configs:
decision_tree:
max_depth: 2
# The following arguments are classifier-specific, and are documented in each
# of the corresponding modules.
# In order to run multiple sets of parameters, use multiple values in the
# following lists, and set hps_type to None.
# The base configuration of the benchmark
# Enable logging
log: True
# The name of each dataset in the directory on which the benchmark should be run
name: "doc_summit"
# A label for the resul directory
label: "example_2_1_2"
# The type of dataset, currently supported ".hdf5", and ".csv"
file_type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored, an absolute path is advised
pathf: "examples/data/"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: True
# Used to be able to run more than one benchmark per minute
debug: False
# The directory in which the results will be stored, an absolute path is advised
res_dir: "examples/results/example_2_1_2/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
# All the classification-realted configuration options
# If the dataset is multiclass, will use this multiclass-to-biclass method
multiclass_method: "oneVersusOne"
# The ratio number of test exmaples/number of train examples
split: 0.2
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 2
# The number of classes to select in the dataset
nb_class: 2
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree", "adaboost", ]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["weighted_linear_late_fusion", ]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 1
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
f1_score:
average: "micro"
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "accuracy_score"
# The type of hyper-parameter optimization method
hps_type: None
# The number of iteration in the hyper-parameter optimization process
hps_args: {}
decision_tree:
max_depth: 3
adaboost:
base_estimator: "DecisionTreeClassifier"
n_estimators: 50
weighted_linear_late_fusion:
classifiers_names: "decision_tree"
classifier_configs:
decision_tree:
max_depth: 2
# The following arguments are classifier-specific, and are documented in each
# of the corresponding modules.
# In order to run multiple sets of parameters, use multiple values in the
# following lists, and set hps_type to None.
# The base configuration of the benchmark
# Enable logging
log: True
# The name of each dataset in the directory on which the benchmark should be run
name: "doc_summit"
# A label for the result directory
label: "example_2_2_1"
# The type of dataset, currently supported ".hdf5", and ".csv"
file_type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored, an absolute path is advised
pathf: "examples/data/"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: True
# Used to be able to run more than one benchmark per minute
debug: False
# The directory in which the results will be stored, an absolute path is advised
res_dir: "examples/results/example_2_2_1/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
# All the classification-realted configuration options
# If the dataset is multiclass, will use this multiclass-to-biclass method
multiclass_method: "oneVersusOne"
# The ratio number of test exmaples/number of train examples
split: 0.8
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 5
# The number of classes to select in the dataset
nb_class: 2
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree", "adaboost", ]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["weighted_linear_late_fusion", ]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 1
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
f1_score:
average: "micro"
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "accuracy_score"
# The type of hyper-parameter optimization method
hps_type: 'Random'
# The number of iteration in the hyper-parameter optimization process
hps_args:
n_iter: 5
equivalent_draws: True
# The following arguments are classifier-specific, and are documented in each
# of the corresponding modules.
# In order to run multiple sets of parameters, use multiple values in the
# following lists, and set hps_type to None.
# The base configuration of the benchmark
# Enable logging
log: True
# The name of each dataset in the directory on which the benchmark should be run
name: "doc_summit"
# A label for the result directory
label: "example_2_3"
# The type of dataset, currently supported ".hdf5", and ".csv"
file_type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored, an absolute path is advised
pathf: "examples/data/"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: True
# Used to be able to run more than one benchmark per minute
debug: False
# The directory in which the results will be stored, an absolute path is advised
res_dir: "examples/results/example_2_3/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
# All the classification-realted configuration options
# If the dataset is multiclass, will use this multiclass-to-biclass method
multiclass_method: "oneVersusOne"
# The ratio number of test exmaples/number of train examples
split: 0.8
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 5
# The number of classes to select in the dataset
nb_class: 2
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree", "adaboost", ]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["weighted_linear_late_fusion", ]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 1
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
f1_score:
average: "micro"
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "accuracy_score"
# The type of hyper-parameter optimization method
hps_type: 'Grid'
# The number of iteration in the hyper-parameter optimization process
hps_args:
decision_tree:
max_depth: [1,2,3,4,5]
adaboost:
n_estimators: [10,15,20,25]
weighted_linear_late_fusion:
classifiers_names:
- ["decision_tree", "decision_tree", "decision_tree", "decision_tree"]
- ["adaboost", "adaboost", "adaboost", "adaboost",]
classifier_configs:
- decision_tree:
max_depth: 3
adaboost:
n_estimators: 10
# The following arguments are classifier-specific, and are documented in each
# of the corresponding modules.
# In order to run multiple sets of parameters, use multiple values in the
# following lists, and set hps_type to None.
# The base configuration of the benchmark
# Enable logging
log: True
# The name of each dataset in the directory on which the benchmark should be run
name: "doc_summit"
# A label for the result directory
label: "example_3"
# The type of dataset, currently supported ".hdf5", and ".csv"
file_type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored, an absolute path is advised
pathf: "examples/data/"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: True
# Used to be able to run more than one benchmark per minute
debug: False
# The directory in which the results will be stored, an absolute path is advised
res_dir: "examples/results/example_3/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
# All the classification-realted configuration options
# If the dataset is multiclass, will use this multiclass-to-biclass method
multiclass_method: "oneVersusOne"
# The ratio number of test exmaples/number of train examples
split: 0.8
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 5
# The number of classes to select in the dataset
nb_class: 2
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree", "adaboost", ]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["weighted_linear_late_fusion", ]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 5
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
f1_score:
average: "micro"
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "accuracy_score"
# The type of hyper-parameter optimization method
hps_type: 'None'
# The number of iteration in the hyper-parameter optimization process
hps_args: {}
decision_tree:
max_depth: 3
adaboost:
base_estimator: "DecisionTreeClassifier"
n_estimators: 10
weighted_linear_late_fusion:
classifiers_names: "decision_tree"
classifier_configs:
decision_tree:
max_depth: 2
# The following arguments are classifier-specific, and are documented in each
# of the corresponding modules.
# In order to run multiple sets of parameters, use multiple values in the
# following lists, and set hps_type to None.
File deleted
File deleted
"""This is the execution module, used to execute the code"""
import os
def execute(config_path=None): # pragma: no cover
import sys
from multiview_platform.mono_multi_view_classifiers import exec_classif
if config_path is None:
exec_classif.exec_classif(sys.argv[1:])
else:
if config_path == "example 0":
config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_0.yml")
elif config_path == "example 1":
config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_1.yml")
elif config_path == "example 2.1.1":
config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_2_1_1.yml")
elif config_path == "example 2.1.2":
config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_2_1_2.yml")
elif config_path == "example 2.2":
config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_2_2.yml")
elif config_path == "example 2.3":
config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_2_3.yml")
elif config_path == "example 3":
config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "examples", "config_files", "config_example_3.yml")
exec_classif.exec_classif(["--config_path", config_path])
if __name__ == "__main__":
execute()
from . import exec_classif, result_analysis, metrics, monoview_classifiers, \
monoview, multiview, utils, multiview_classifiers
__all__ = ['metrics', 'monoview', 'monoview_classifiers', 'multiview', 'utils']
This diff is collapsed.
__version__ = "0.0.0.0"
"""
To be able to add another metric to the benchmark you must :
Create a .py file named after the metric
Define a score function
Input :
y_true : np array with the real labels
y_pred : np array with the predicted labels
kwargs : every argument that is specific to the metric
Returns:
score : the metric's score (float)
Define a get_scorer function
Input :
kwargs : every argument that is specific to the metric
Returns :
scorer : an object similar to an sk-learn scorer
Define a getConfig function
Input :
kwargs : every argument that is specific to the metric
Output :
config_string : A string that gives the name of the metric and explains how it is configured. Must end by
(lower is better) or (higher is better) to be able to analyze the preds
"""
import os
for module in os.listdir(os.path.dirname(os.path.realpath(__file__))):
if module in ['__init__.py'] or module[-3:] != '.py':
continue
__import__(module[:-3], locals(), globals(), [], 1)
pass
del os
"""Functions :
score: to get the accuracy score
get_scorer: returns a sklearn scorer for grid search
"""
import warnings
from sklearn.metrics import accuracy_score as metric
from sklearn.metrics import make_scorer
warnings.warn("the accuracy_score module is deprecated", DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=False, **kwargs):
"""Arguments:
y_true: real labels
y_pred: predicted labels
Keyword Arguments:
"0": weights to compute accuracy
Returns:
Weighted accuracy score for y_true, y_pred"""
score = metric(y_true, y_pred, **kwargs)
return score
def get_scorer(**kwargs):
"""Keyword Arguments:
"0": weights to compute accuracy
Returns:
A weighted sklearn scorer for accuracy"""
return make_scorer(metric, greater_is_better=True,
**kwargs)
def get_config(**kwargs):
config_string = "Accuracy score using {}, (higher is better)".format(kwargs)
return config_string
"""Functions :
score: to get the f1 score
get_scorer: returns a sklearn scorer for grid search
"""
import warnings
from sklearn.metrics import f1_score as metric
from sklearn.metrics import make_scorer
warnings.warn("the f1_score module is deprecated", DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=True, average='micro', **kwargs):
score = metric(y_true, y_pred, average=average, **kwargs)
return score
def get_scorer(average="micro", **kwargs):
return make_scorer(metric, greater_is_better=True, average=average,
**kwargs)
def get_config(average="micro", **kwargs, ):
config_string = "F1 score using average: {}, {} (higher is better)".format(
average, kwargs)
return config_string
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment