diff --git a/config_files/config_cuisine.yml b/config_files/config_cuisine.yml index c0ea46e133ad1fa6ca9955a151d305089f5a46a7..2ff90a34626f0fb22bc7adb2c382b6b15d6eed95 100644 --- a/config_files/config_cuisine.yml +++ b/config_files/config_cuisine.yml @@ -1,10 +1,10 @@ # The base configuration of the benchmark log: True -name: ["digits"] +name: ["lives_14view_EMF"] label: "_" file_type: ".hdf5" views: -pathf: "/home/baptiste/Documents/Datasets/Digits/" +pathf: "/home/baptiste/Documents/Datasets/Alexis/data/" nice: 0 random_state: 42 nb_cores: 1 @@ -20,15 +20,15 @@ multiclass_method: "oneVersusOne" split: 0.75 nb_folds: 5 nb_class: 2 -classes: -type: ["multiview",] -algos_monoview: ["group_scm",] +classes: ["multi_clustered", "EMF"] +type: ["multiview","monoview"] +algos_monoview: ["scm_mazid", "decision_tree"] algos_multiview: ["group_scm"] stats_iter: 2 metrics: accuracy_score: {} f1_score: - average: 'micro' + average: 'binary' metric_princ: "accuracy_score" hps_type: "None" hps_args: {} \ No newline at end of file diff --git a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/group_scm.py b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/group_scm.py index aa7a3e854f1d570c40766bbd61b458cb9ae73fd7..6d8fef7e60fd203d2e7d951a8fbe67bd4ca000a1 100644 --- a/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/group_scm.py +++ b/multiview_platform/mono_multi_view_classifiers/multiview_classifiers/group_scm.py @@ -5,6 +5,7 @@ import logging import numpy as np from six import iteritems +import pickle from pyscm._scm_utility import find_max as find_max_utility # cpp extensions from pyscm.model import ConjunctionModel, DisjunctionModel from pyscm.rules import DecisionStump @@ -49,7 +50,7 @@ class GroupSetCoveringMachineClassifier(BaseSetCoveringMachine): """ self.features_to_index = features_to_index self.prior_rules = np.asarray(prior_rules) - self.groups = groups + self.groups = pickle.load(open('/home/baptiste/Documents/Clouded/short_projects/pickles_mazid/pathways_multiview_groups.pck', 'rb')) self.tiebreaker = tiebreaker self.update_method = update_method self.groups_rules = [] # GR @@ -261,7 +262,7 @@ class GroupSCM(BaseMultiviewClassifier): CV, gridsearch, and so on ... """ - def __init__(self, features_to_index=None, prior_rules=None, update_method=None, + def __init__(self, features_to_index=None, prior_rules=None, update_method='inner_group', groups=None, tiebreaker='', model_type='conjunction', p=0.1, max_rules=10, random_state=42): super(GroupSCM, self).__init__(random_state) @@ -269,7 +270,7 @@ class GroupSCM(BaseMultiviewClassifier): self.p = p self.max_rules = max_rules self.random_state = random_state - self.features_to_index = features_to_index + self.features_to_index = dict((i, "feature_{}".format(i)) for i in range(37325)) self.prior_rules = prior_rules self.groups = groups self.tiebreaker = tiebreaker @@ -303,15 +304,40 @@ class GroupSCM(BaseMultiviewClassifier): , axis=1) return monoview_data + def f_1(self,c , x): + """Compute an update function""" + return np.exp(-c * x) def fit(self, X, y, train_indices=None, view_indices=None): train_indices, X = self.transform_data_to_monoview(X, train_indices, view_indices) + dict_pr_group = pickle.load(open("/home/baptiste/Documents/Clouded/short_projects/pickles_mazid/multiview_pathways_dict.pck", 'rb')) + dict_pr_rules = pickle.load(open("/home/baptiste/Documents/Clouded/short_projects/pickles_mazid/pathways_multiview_groups.pck", 'rb')) + c=0.1 + inverse_prior_group=False + # Build PriorGroups vector, p_g + prior_values_dict_pr_group = {k: self.f_1(c, len(v)) for k, v in + dict_pr_group.items()} + for k, v in prior_values_dict_pr_group.items(): + if v == 0.0: + prior_values_dict_pr_group[k] = 1e-10 + + # Build PriorRules vector, p_ri + if inverse_prior_group: + prior_values_dict_pr_rules = { + k: self.f_1(c, 1 / prior_values_dict_pr_group[v]) for k, v in + dict_pr_rules.items()} + else: + prior_values_dict_pr_rules = { + k: self.f_1(c, prior_values_dict_pr_group[v]) for k, v in + dict_pr_rules.items()} + prior_rules = [prior_values_dict_pr_rules[name] for name in + self.features_to_index.values()] self.clf = GroupSetCoveringMachineClassifier( features_to_index=self.features_to_index, - prior_rules=self.prior_rules, + prior_rules=prior_rules, update_method=self.update_method, groups=self.groups, tiebreaker=self.tiebreaker,