Skip to content
Snippets Groups Projects
Commit 113d5d88 authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Frgot to add files

parent e9a4a376
No related branches found
No related tags found
No related merge requests found
Showing
with 674 additions and 0 deletions
File added
import warnings
from sklearn.metrics import jaccard_score as metric
from sklearn.metrics import make_scorer
warnings.warn("the jaccard_similarity_score module is deprecated",
DeprecationWarning,
stacklevel=2)
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def score(y_true, y_pred, multiclass=False, **kwargs):
score = metric(y_true, y_pred, **kwargs)
return score
def get_scorer(**kwargs):
return make_scorer(metric, greater_is_better=True,
**kwargs)
def get_config(**kwargs):
config_string = "Jaccard_similarity score using {} (higher is better)".format(
kwargs)
return config_string
# The base configuration of the benchmark
# Enable logging
log: False
# The name of each dataset in the directory on which the benchmark should be run
name: "digits_doc"
# A label for the resul directory
label: "example_0"
# The type of dataset, currently supported ".hdf5", and ".csv"
file_type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored, an absolute path is advised
pathf: "../examples/data/"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: True
# Used to be able to run more than one benchmark per minute
debug: False
# The directory in which the results will be stored, an absolute path is advised
res_dir: "tmp_tests/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
# All the classification-realted configuration options
# The ratio of test examples/number of train examples
split: 0.25
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 2
# The number of classes to select in the dataset
nb_class:
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree"]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["weighted_linear_early_fusion",]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 1
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
f1_score:
average: "micro"
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "accuracy_score"
# The type of hyper-parameter optimization method
hps_type: "Random"
# The number of iteration in the hyper-parameter optimization process
hps_args:
n_iter: 2
equivalent_draws: False
### Configuring the hyper-parameters for the classifiers
decision_tree:
max_depth: 3
weighted_linear_early_fusion:
monoview_classifier_name: "decision_tree"
monoview_classifier_config:
decision_tree:
max_depth: 6
weighted_linear_late_fusion:
classifiers_names: "decision_tree"
classifier_configs:
decision_tree:
max_depth: 3
# The base configuration of the benchmark
# Enable logging
log: False
# The name of each dataset in the directory on which the benchmark should be run
name: "digits_doc"
# A label for the resul directory
label: "example_0"
# The type of dataset, currently supported ".hdf5", and ".csv"
file_type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored, an absolute path is advised
pathf: "../examples/data/"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: True
# Used to be able to run more than one benchmark per minute
debug: False
# The directory in which the results will be stored, an absolute path is advised
res_dir: "tmp_tests/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
# All the classification-realted configuration options
# The ratio of test examples/number of train examples
split: 0.25
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 2
# The number of classes to select in the dataset
nb_class:
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree"]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["weighted_linear_early_fusion",]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 2
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
f1_score:
average: "micro"
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "accuracy_score"
# The type of hyper-parameter optimization method
hps_type: "None"
# The number of iteration in the hyper-parameter optimization process
hps_args: {}
### Configuring the hyper-parameters for the classifiers
decision_tree:
max_depth: 3
weighted_linear_early_fusion:
monoview_classifier_name: "decision_tree"
monoview_classifier_config:
decision_tree:
max_depth: 6
weighted_linear_late_fusion:
classifiers_names: "decision_tree"
classifier_configs:
decision_tree:
max_depth: 3
# The base configuration of the benchmark
# Enable logging
log: False
# The name of each dataset in the directory on which the benchmark should be run
name: "digits_doc"
# A label for the resul directory
label: "example_0"
# The type of dataset, currently supported ".hdf5", and ".csv"
file_type: ".hdf5"
# The views to use in the banchmark, an empty value will result in using all the views
views:
# The path to the directory where the datasets are stored, an absolute path is advised
pathf: "../examples/data/"
# The niceness of the processes, useful to lower their priority
nice: 0
# The random state of the benchmark, useful for reproducibility
random_state: 42
# The number of parallel computing threads
nb_cores: 1
# Used to run the benchmark on the full dataset
full: True
# Used to be able to run more than one benchmark per minute
debug: False
# The directory in which the results will be stored, an absolute path is advised
res_dir: "tmp_tests/"
# If an error occurs in a classifier, if track_tracebacks is set to True, the
# benchmark saves the traceback and continues, if it is set to False, it will
# stop the benchmark and raise the error
track_tracebacks: True
# All the classification-realted configuration options
# The ratio of test examples/number of train examples
split: 0.25
# The nubmer of folds in the cross validation process when hyper-paramter optimization is performed
nb_folds: 2
# The number of classes to select in the dataset
nb_class:
# The name of the classes to select in the dataset
classes:
# The type of algorithms to run during the benchmark (monoview and/or multiview)
type: ["monoview","multiview"]
# The name of the monoview algorithms to run, ["all"] to run all the available classifiers
algos_monoview: ["decision_tree"]
# The names of the multiview algorithms to run, ["all"] to run all the available classifiers
algos_multiview: ["weighted_linear_early_fusion", "weighted_linear_late_fusion",]
# The number of times the benchamrk is repeated with different train/test
# split, to have more statistically significant results
stats_iter: 1
# The metrics that will be use din the result analysis
metrics:
accuracy_score: {}
f1_score:
average: "micro"
# The metric that will be used in the hyper-parameter optimization process
metric_princ: "accuracy_score"
# The type of hyper-parameter optimization method
hps_type: "None"
# The number of iteration in the hyper-parameter optimization process
hps_args: {}
### Configuring the hyper-parameters for the classifiers
decision_tree:
max_depth: 3
weighted_linear_early_fusion:
monoview_classifier_name: "decision_tree"
monoview_classifier_config:
decision_tree:
max_depth: 6
weighted_linear_late_fusion:
classifiers_names: "decision_tree"
classifier_configs:
decision_tree:
max_depth: 3
import unittest
import multiview_platform.mono_multi_view_classifiers.metrics as metrics
import pkgutil
import os
from sklearn.metrics._scorer import _BaseScorer
# Tester que chaque metrique a bien les bonnes fonctions qui renvoient bien les bons types d'outputs avec les bons types d'inputs
# Faire de meme pour les differents classifeurs monovues et les differents classifeurs multivues
class Test_metric(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.test="a"
def test_simple(self):
pkgpath = os.path.dirname(metrics.__file__)
for _, metric, _ in pkgutil.iter_modules([pkgpath]):
module = getattr(metrics, metric)
self.assertTrue(hasattr(module, "score"))
self.assertTrue(isinstance(module.score([1,0],[1,0]), float))
self.assertTrue(hasattr(module, "get_scorer"))
self.assertTrue(isinstance(module.get_scorer(), _BaseScorer))
self.assertTrue(hasattr(module, "get_config"))
self.assertTrue(isinstance(module.get_config(), str))
import os
import unittest
import h5py
import numpy as np
from sklearn.model_selection import StratifiedKFold
from multiview_platform.tests.utils import rm_tmp, tmp_path, test_dataset
from multiview_platform.mono_multi_view_classifiers.monoview import exec_classif_mono_view
from multiview_platform.mono_multi_view_classifiers.monoview_classifiers import decision_tree
class Test_initConstants(unittest.TestCase):
@classmethod
def setUpClass(cls):
rm_tmp()
os.mkdir(tmp_path)
cls.view_name="test_dataset"
cls.datasetFile = h5py.File(
tmp_path+"test.hdf5", "w")
cls.random_state = np.random.RandomState(42)
cls.args = {"classifier_name": "test_clf"}
cls.X_value = cls.random_state.randint(0, 500, (10, 20))
cls.X = cls.datasetFile.create_dataset("View0", data=cls.X_value)
cls.X.attrs["name"] = "test_dataset"
cls.X.attrs["sparse"] = False
cls.classification_indices = [np.array([0, 2, 4, 6, 8]),
np.array([1, 3, 5, 7, 9]),
np.array([1, 3, 5, 7, 9])]
cls.labels_names = ["test_true", "test_false"]
cls.name = "test"
cls.directory = os.path.join(tmp_path, "test_dir/")
def test_simple(cls):
kwargs, \
t_start, \
feat, \
CL_type, \
X, \
learningRate, \
labelsString, \
output_file_name,\
directory,\
base_file_name = exec_classif_mono_view.init_constants(cls.args,
cls.X,
cls.classification_indices,
cls.labels_names,
cls.name,
cls.directory,
cls.view_name)
cls.assertEqual(kwargs, cls.args)
cls.assertEqual(feat, "test_dataset")
cls.assertEqual(CL_type, "test_clf")
np.testing.assert_array_equal(X, cls.X_value)
cls.assertEqual(learningRate, 0.5)
cls.assertEqual(labelsString, "test_true-test_false")
# cls.assertEqual(output_file_name, "Code/tests/temp_tests/test_dir/test_clf/test_dataset/results-test_clf-test_true-test_false-learnRate0.5-test-test_dataset-")
@classmethod
def tearDownClass(cls):
os.remove(tmp_path+"test.hdf5")
os.rmdir(
tmp_path+"test_dir/test_clf/test_dataset")
os.rmdir(tmp_path+"test_dir/test_clf")
os.rmdir(tmp_path+"test_dir")
os.rmdir(tmp_path)
class Test_initTrainTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
rm_tmp()
cls.random_state = np.random.RandomState(42)
cls.X = cls.random_state.randint(0, 500, (10, 5))
cls.Y = cls.random_state.randint(0, 2, 10)
cls.classification_indices = [np.array([0, 2, 4, 6, 8]),
np.array([1, 3, 5, 7, 9]),
]
def test_simple(cls):
X_train, y_train, X_test, y_test = exec_classif_mono_view.init_train_test(
cls.X, cls.Y, cls.classification_indices)
np.testing.assert_array_equal(X_train, np.array(
[np.array([102, 435, 348, 270, 106]),
np.array([466, 214, 330, 458, 87]),
np.array([149, 308, 257, 343, 491]),
np.array([276, 160, 459, 313, 21]),
np.array([58, 169, 475, 187, 463])]))
np.testing.assert_array_equal(X_test, np.array(
[np.array([71, 188, 20, 102, 121]),
np.array([372, 99, 359, 151, 130]),
np.array([413, 293, 385, 191, 443]),
np.array([252, 235, 344, 48, 474]),
np.array([270, 189, 445, 174, 445])]))
np.testing.assert_array_equal(y_train, np.array([0, 0, 1, 0, 0]))
np.testing.assert_array_equal(y_test, np.array([1, 1, 0, 0, 0]))
class Test_getHPs(unittest.TestCase):
@classmethod
def setUpClass(cls):
rm_tmp()
os.mkdir(tmp_path)
cls.classifierModule = decision_tree
cls.hyper_param_search = "Random"
cls.classifier_name = "decision_tree"
cls.random_state = np.random.RandomState(42)
cls.X = cls.random_state.randint(0,10,size=(10,5))
cls.y = cls.random_state.randint(0,2,size=10)
cls.output_file_name = tmp_path
cls.cv = StratifiedKFold(n_splits=2, random_state=cls.random_state, shuffle=True)
cls.nb_cores = 1
cls.metrics = {"accuracy_score*": {}}
cls.kwargs = {"decision_tree" : {"max_depth": 1,
"criterion": "gini",
"splitter": "best"}}
cls.classifier_class_name = "DecisionTree"
cls.hps_kwargs = {"n_iter": 2}
@classmethod
def tearDownClass(cls):
for file_name in os.listdir(tmp_path):
os.remove(
os.path.join(tmp_path, file_name))
os.rmdir(tmp_path)
def test_simple(self):
kwargs = exec_classif_mono_view.get_hyper_params(self.classifierModule,
self.hyper_param_search,
self.classifier_name,
self.classifier_class_name,
self.X,
self.y,
self.random_state,
self.output_file_name,
self.cv,
self.nb_cores,
self.metrics,
self.kwargs,
**self.hps_kwargs)
class Test_exec_monoview(unittest.TestCase):
def test_simple(self):
os.mkdir(tmp_path)
out = exec_classif_mono_view.exec_monoview(tmp_path,
test_dataset.get_v(0),
test_dataset.get_labels(),
"test dataset",
["yes", "no"],
[np.array([0,1,2,4]), np.array([4])],
StratifiedKFold(n_splits=2),
1,
"",
"",
np.random.RandomState(42),
"Random",
n_iter=2,
**{"classifier_name":"decision_tree",
"view_index":0,
"decision_tree":{}})
rm_tmp()
# class Test_getKWARGS(unittest.TestCase):
#
# @classmethod
# def setUpClass(cls):
# cls.classifierModule = None
# cls.hyper_param_search = "None"
# cls.nIter = 2
# cls.CL_type = "string"
# cls.X_train = np.zeros((10,20))
# cls.y_train = np.zeros((10))
# cls.random_state = np.random.RandomState(42)
# cls.outputFileName = "test_file"
# cls.KFolds = None
# cls.nbCores = 1
# cls.metrics = {"accuracy_score":""}
# cls.kwargs = {}
#
# def test_simple(cls):
# clKWARGS = ExecClassifMonoView.getHPs(cls.classifierModule,
# cls.hyper_param_search,
# cls.nIter,
# cls.CL_type,
# cls.X_train,
# cls.y_train,
# cls.random_state,
# cls.outputFileName,
# cls.KFolds,
# cls.nbCores,
# cls.metrics,
# cls.kwargs)
# pass
#
# class Test_saveResults(unittest.TestCase):
#
# @classmethod
# def setUpClass(cls):
# cls.stringAnalysis = "string analysis"
# cls.outputFileName = "test_file"
# cls.full_labels_pred = np.zeros(10)
# cls.y_train_pred = np.ones(5)
# cls.y_train = np.zeros(5)
# cls.imagesAnalysis = {}
#
# def test_simple(cls):
# ExecClassifMonoView.saveResults(cls.stringAnalysis,
# cls.outputFileName,
# cls.full_labels_pred,
# cls.y_train_pred,
# cls.y_train,
# cls.imagesAnalysis)
# # Test if the files are created with the right content
#
# def test_with_image_analysis(cls):
# cls.imagesAnalysis = {"test_image":"image.png"} # Image to gen
# ExecClassifMonoView.saveResults(cls.stringAnalysis,
# cls.outputFileName,
# cls.full_labels_pred,
# cls.y_train_pred,
# cls.y_train,
# cls.imagesAnalysis)
# # Test if the files are created with the right content
#
import unittest
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from multiview_platform.mono_multi_view_classifiers.monoview import monoview_utils
from multiview_platform.mono_multi_view_classifiers.utils.hyper_parameter_search import CustomRandint
class TestFunctions(unittest.TestCase):
def test_gen_test_folds_preds(self):
self.random_state = np.random.RandomState(42)
self.X_train = self.random_state.random_sample((31, 10))
self.y_train = np.ones(31, dtype=int)
self.KFolds = StratifiedKFold(n_splits=3, )
self.estimator = DecisionTreeClassifier(max_depth=1)
self.y_train[15:] = -1
testFoldsPreds = monoview_utils.gen_test_folds_preds(self.X_train,
self.y_train,
self.KFolds,
self.estimator)
self.assertEqual(testFoldsPreds.shape, (3, 10))
np.testing.assert_array_equal(testFoldsPreds[0], np.array(
[ 1, 1, -1, -1, 1, 1, -1, 1, -1, 1]))
def test_change_label_to_minus(self):
lab = monoview_utils.change_label_to_minus(np.array([0,1,0]))
np.testing.assert_array_equal(lab, np.array([-1,1,-1]))
def test_change_label_to_zero(self):
lab = monoview_utils.change_label_to_zero(np.array([-1,1,-1]))
np.testing.assert_array_equal(lab, np.array([0,1,0]))
def test_compute_possible_combinations(self):
n_possib = monoview_utils.compute_possible_combinations({"a":[1, 2], "b":{"c":[2,3]}, "d":CustomRandint(0,10)})
np.testing.assert_array_equal(n_possib, np.array([2, np.inf, 10]))
\ No newline at end of file
import os
import unittest
import h5py
import numpy as np
from sklearn.model_selection import StratifiedKFold
from multiview_platform.tests.utils import rm_tmp, tmp_path, test_dataset
from multiview_platform.mono_multi_view_classifiers.multiview import exec_multiview
class Test_init_constants(unittest.TestCase):
@classmethod
def setUpClass(cls):
rm_tmp()
os.mkdir(tmp_path)
@classmethod
def tearDownClass(cls):
rm_tmp()
def test_simple(self):
classifier_name, t_start, views_indices, \
classifier_config, views, learning_rate, labels, output_file_name, \
directory, base_file_name, metrics = exec_multiview.init_constants(
kwargs={"view_names":["ViewN0", "ViewN2", "ViewN1", ],
"view_indices": [0,2,1],
"classifier_name":"test_clf",
"test_clf":{}},
classification_indices=[np.array([0,1,4,2]), np.array([3])],
metrics={"accuracy_score*":{}},
name="test_dataset",
nb_cores=1,
k_folds=StratifiedKFold(n_splits=2),
dataset_var=test_dataset,
directory=tmp_path
)
self.assertEqual(classifier_name, "test_clf")
self.assertEqual(views_indices, [0,2,1])
self.assertEqual(classifier_config, {})
self.assertEqual(views, ["ViewN0", "ViewN2", "ViewN1", ])
self.assertEqual(learning_rate, 4/5)
def test_exec_multiview_no_hps(self):
res = exec_multiview.exec_multiview(
directory=tmp_path,
dataset_var=test_dataset,
name="test_dataset",
classification_indices=[np.array([0,1,4,2]), np.array([3])],
k_folds=StratifiedKFold(n_splits=2),
nb_cores=1,
database_type="", path="",
labels_dictionary={0:"yes", 1:"no"},
random_state=np.random.RandomState(42),
labels=test_dataset.get_labels(),
hps_method="None",
hps_kwargs={},
metrics=None,
n_iter=30,
**{"view_names":["ViewN0", "ViewN2", "ViewN1", ],
"view_indices": [0,2,1],
"classifier_name":"weighted_linear_early_fusion",
"weighted_linear_early_fusion":{}}
)
def test_exec_multiview(self):
res = exec_multiview.exec_multiview(
directory=tmp_path,
dataset_var=test_dataset,
name="test_dataset",
classification_indices=[np.array([0,1,4,2]), np.array([3])],
k_folds=StratifiedKFold(n_splits=2),
nb_cores=1,
database_type="", path="",
labels_dictionary={0:"yes", 1:"no"},
random_state=np.random.RandomState(42),
labels=test_dataset.get_labels(),
hps_method="Grid",
hps_kwargs={"param_grid":
{"monoview_classifier_config":[{"max_depth":3}, {"max_depth":1}]},
},
metrics=None,
n_iter=30,
**{"view_names":["ViewN0", "ViewN2", "ViewN1", ],
"view_indices": [0,2,1],
"classifier_name":"weighted_linear_early_fusion",
"weighted_linear_early_fusion":{}}
)
\ No newline at end of file
[tool:pytest]
testpaths = multiview_platform
addopts = --cov-report=html
--verbose
--cov=multiview_platform
--cov-report=term-missing
--cov-config ../setup.cfg
--cache-clear
[coverage:run]
source = multiview_platform
include = */mono_multi_view_classifiers/*
omit = */tests/*
*/examples/*
*/monoview_classifiers/*
*/multiview_classifiers/*
*/datasets/*
*declare_classifier.py
*make_file_config.py
[coverage:report]
exclude_lines = pragma: no cover
\ No newline at end of file
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment