From d137b537816dc88b8b39af4ee5815a780dc26e4d Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Mon, 18 Mar 2019 14:11:41 -0400 Subject: [PATCH] Corrected --- .../Monoview/Additions/QarBoostUtils.py | 2 +- .../Monoview/Additions/_custom_criterion.pxd | 99 +++ .../Monoview/Additions/_custom_criterion.pyx | 582 ++++++++++++++++++ .../MonoviewClassifiers/CGreed.py | 2 +- .../MonoviewClassifiers/CQBoost.py | 3 +- .../ResultAnalysis.py | 7 +- .../utils/GetMultiviewDb.py | 8 +- .../utils/execution.py | 8 +- setup.py | 5 +- 9 files changed, 703 insertions(+), 13 deletions(-) create mode 100644 multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pxd create mode 100644 multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py index a57f145e..f9d7917d 100644 --- a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py @@ -187,7 +187,7 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost): """Is used at each iteration to compute and store all the needed quantities for later analysis""" self.example_weights_.append(self.example_weights) self.tau.append(np.sum(np.multiply(self.previous_vote, self.new_voter))/float(self.n_total_examples)) - print(np.sum(np.multiply(self.previous_vote, self.new_voter))/float(self.n_total_examples)) + # print(np.sum(np.multiply(self.previous_vote, self.new_voter))/float(self.n_total_examples)) self.previous_vote += self.q * self.new_voter self.norm.append(np.linalg.norm(self.previous_vote)**2) self.previous_votes.append(self.previous_vote) diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pxd b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pxd new file mode 100644 index 00000000..2272f6a4 --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pxd @@ -0,0 +1,99 @@ +# cython: language_level=3 + +# Authors: Gilles Louppe <g.louppe@gmail.com> +# Peter Prettenhofer <peter.prettenhofer@gmail.com> +# Arnaud Joly <arnaud.v.joly@gmail.com> +# Jacob Schreiber <jmschreiber91@gmail.com> +# Nelson Liu <nelson@nelsonliu.me> +# +# License: BSD 3 clause +# +# cython: language_level=3 + +# See _utils.pyx for details. + +import numpy as np +cimport numpy as np + + + +ctypedef np.npy_float32 DTYPE_t # Type of X +ctypedef np.npy_float64 DOUBLE_t # Type of y, sample_weight +ctypedef np.npy_intp SIZE_t # Type for indices and counters +ctypedef np.npy_int32 INT32_t # Signed 32 bit integer +ctypedef np.npy_uint32 UINT32_t # Unsigned 32 bit integer + +cdef struct Node: + # Base storage structure for the nodes in a Tree object + + SIZE_t left_child # id of the left child of the node + SIZE_t right_child # id of the right child of the node + SIZE_t feature # Feature used for splitting the node + DOUBLE_t threshold # Threshold value at the node + DOUBLE_t impurity # Impurity of the node (i.e., the value of the criterion) + SIZE_t n_node_samples # Number of samples at the node + DOUBLE_t weighted_n_node_samples # Weighted number of samples at the node + +ctypedef fused realloc_ptr: + # Add pointer types here as needed. + (DTYPE_t*) + (SIZE_t*) + (unsigned char*) + (WeightedPQueueRecord*) + (DOUBLE_t*) + (DOUBLE_t**) + (Node*) + (Cell*) + (Node**) + (StackRecord*) + (PriorityHeapRecord*) + +cdef struct Cell: + # Base storage structure for cells in a QuadTree object + + # Tree structure + SIZE_t parent # Parent cell of this cell + SIZE_t[8] children # Array pointing to childrens of this cell + + # Cell description + SIZE_t cell_id # Id of the cell in the cells array in the Tree + SIZE_t point_index # Index of the point at this cell (only defined + # in non empty leaf) + bint is_leaf # Does this cell have children? + DTYPE_t squared_max_width # Squared value of the maximum width w + SIZE_t depth # Depth of the cell in the tree + SIZE_t cumulative_size # Number of points included in the subtree with + # this cell as a root. + + # Internal constants + DTYPE_t[3] center # Store the center for quick split of cells + DTYPE_t[3] barycenter # Keep track of the center of mass of the cell + + # Cell boundaries + DTYPE_t[3] min_bounds # Inferior boundaries of this cell (inclusive) + DTYPE_t[3] max_bounds # Superior boundaries of this cell (exclusive) + +cdef struct WeightedPQueueRecord: + DOUBLE_t data + DOUBLE_t weigh + +cdef struct StackRecord: + SIZE_t start + SIZE_t end + SIZE_t depth + SIZE_t parent + bint is_left + double impurity + SIZE_t n_constant_features + +cdef struct PriorityHeapRecord: + SIZE_t node_id + SIZE_t start + SIZE_t end + SIZE_t pos + SIZE_t depth + bint is_leaf + double impurity + double impurity_left + double impurity_right + double improvement \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx new file mode 100644 index 00000000..349c696e --- /dev/null +++ b/multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx @@ -0,0 +1,582 @@ +# cython: cdivision=True +# cython: boundscheck=False +# cython: wraparound=False + +# Authors: Gilles Louppe <g.louppe@gmail.com> +# Peter Prettenhofer <peter.prettenhofer@gmail.com> +# Brian Holt <bdholt1@gmail.com> +# Noel Dawe <noel@dawe.me> +# Satrajit Gosh <satrajit.ghosh@gmail.com> +# Lars Buitinck +# Arnaud Joly <arnaud.v.joly@gmail.com> +# Joel Nothman <joel.nothman@gmail.com> +# Fares Hedayati <fares.hedayati@gmail.com> +# Jacob Schreiber <jmschreiber91@gmail.com> +# Nelson Liu <nelson@nelsonliu.me> +# +# License: BSD 3 clause + +from libc.stdlib cimport calloc +from libc.stdlib cimport free +from libc.string cimport memcpy +from libc.string cimport memset +from libc.math cimport fabs +from libc.stdlib cimport malloc +from libc.stdlib cimport realloc +# from libc.math cimport log as ln + +import numpy as np +cimport numpy as np +np.import_array() +# from sklearn.tree._criterion cimport Criterion, ClassificationCriterion + +cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except *: + # sizeof(realloc_ptr[0]) would be more like idiomatic C, but causes Cython + # 0.20.1 to crash. + cdef size_t nbytes = nelems * sizeof(p[0][0]) + if nbytes / sizeof(p[0][0]) != nelems: + # Overflow in the multiplication + with gil: + raise MemoryError("could not allocate (%d * %d) bytes" + % (nelems, sizeof(p[0][0]))) + cdef realloc_ptr tmp = <realloc_ptr>realloc(p[0], nbytes) + if tmp == NULL: + with gil: + raise MemoryError("could not allocate %d bytes" % nbytes) + p[0] = tmp + return tmp # for + + +cdef inline np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size): + """Return copied data as 1D numpy array of intp's.""" + cdef np.npy_intp shape[1] + shape[0] = <np.npy_intp> size + return np.PyArray_SimpleNewFromData(1, shape, np.NPY_INTP, data).copy() + + +cdef class CustomCriterion: + """Interface for impurity criteria. + This object stores methods on how to calculate how good a split is using + different metrics. + """ + + def __dealloc__(self): + """Destructor.""" + + free(self.sum_total) + free(self.sum_left) + free(self.sum_right) + + def __getstate__(self): + return {} + + def __setstate__(self, d): + pass + + cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, DOUBLE_t* sample_weight, + double weighted_n_samples, SIZE_t* samples, SIZE_t start, + SIZE_t end) nogil except -1: + """Placeholder for a method which will initialize the criterion. + Returns -1 in case of failure to allocate memory (and raise MemoryError) + or 0 otherwise. + Parameters + ---------- + y : array-like, dtype=DOUBLE_t + y is a buffer that can store values for n_outputs target variables + y_stride : SIZE_t + y_stride is used to index the kth output value as follows: + y[i, k] = y[i * y_stride + k] + sample_weight : array-like, dtype=DOUBLE_t + The weight of each sample + weighted_n_samples : DOUBLE_t + The total weight of the samples being considered + samples : array-like, dtype=DOUBLE_t + Indices of the samples in X and y, where samples[start:end] + correspond to the samples in this node + start : SIZE_t + The first sample to be used on this node + end : SIZE_t + The last sample used on this node + """ + + pass +# +# cdef int reset(self) nogil except -1: +# """Reset the criterion at pos=start. +# This method must be implemented by the subclass. +# """ +# +# pass +# +# cdef int reverse_reset(self) nogil except -1: +# """Reset the criterion at pos=end. +# This method must be implemented by the subclass. +# """ +# pass +# +# cdef int update(self, SIZE_t new_pos) nogil except -1: +# """Updated statistics by moving samples[pos:new_pos] to the left child. +# This updates the collected statistics by moving samples[pos:new_pos] +# from the right child to the left child. It must be implemented by +# the subclass. +# Parameters +# ---------- +# new_pos : SIZE_t +# New starting index position of the samples in the right child +# """ +# +# pass +# +# cdef double node_impurity(self) nogil: +# """Placeholder for calculating the impurity of the node. +# Placeholder for a method which will evaluate the impurity of +# the current node, i.e. the impurity of samples[start:end]. This is the +# primary function of the criterion class. +# """ +# +# pass +# +# cdef void children_impurity(self, double* impurity_left, +# double* impurity_right) nogil: +# """Placeholder for calculating the impurity of children. +# Placeholder for a method which evaluates the impurity in +# children nodes, i.e. the impurity of samples[start:pos] + the impurity +# of samples[pos:end]. +# Parameters +# ---------- +# impurity_left : double pointer +# The memory address where the impurity of the left child should be +# stored. +# impurity_right : double pointer +# The memory address where the impurity of the right child should be +# stored +# """ +# +# pass +# +# cdef void node_value(self, double* dest) nogil: +# """Placeholder for storing the node value. +# Placeholder for a method which will compute the node value +# of samples[start:end] and save the value into dest. +# Parameters +# ---------- +# dest : double pointer +# The memory address where the node value should be stored. +# """ +# +# pass +# +# cdef double proxy_impurity_improvement(self) nogil: +# """Compute a proxy of the impurity reduction +# This method is used to speed up the search for the best split. +# It is a proxy quantity such that the split that maximizes this value +# also maximizes the impurity improvement. It neglects all constant terms +# of the impurity decrease for a given split. +# The absolute impurity improvement is only computed by the +# impurity_improvement method once the best split has been found. +# """ +# cdef double impurity_left +# cdef double impurity_right +# self.children_impurity(&impurity_left, &impurity_right) +# +# return (- self.weighted_n_right * impurity_right +# - self.weighted_n_left * impurity_left) +# +# cdef double impurity_improvement(self, double impurity) nogil: +# """Compute the improvement in impurity +# This method computes the improvement in impurity when a split occurs. +# The weighted impurity improvement equation is the following: +# N_t / N * (impurity - N_t_R / N_t * right_impurity +# - N_t_L / N_t * left_impurity) +# where N is the total number of samples, N_t is the number of samples +# at the current node, N_t_L is the number of samples in the left child, +# and N_t_R is the number of samples in the right child, +# Parameters +# ---------- +# impurity : double +# The initial impurity of the node before the split +# Return +# ------ +# double : improvement in impurity after the split occurs +# """ +# +# cdef double impurity_left +# cdef double impurity_right +# +# self.children_impurity(&impurity_left, &impurity_right) +# +# return ((self.weighted_n_node_samples / self.weighted_n_samples) * +# (impurity - (self.weighted_n_right / +# self.weighted_n_node_samples * impurity_right) +# - (self.weighted_n_left / +# self.weighted_n_node_samples * impurity_left))) +# +# +# cdef class CustomClassificationCriterion(Criterion): +# """Abstract criterion for classification.""" +# +# def __cinit__(self, SIZE_t n_outputs, +# np.ndarray[SIZE_t, ndim=1] n_classes): +# """Initialize attributes for this criterion. +# Parameters +# ---------- +# n_outputs : SIZE_t +# The number of targets, the dimensionality of the prediction +# n_classes : numpy.ndarray, dtype=SIZE_t +# The number of unique classes in each target +# """ +# +# self.y = NULL +# self.y_stride = 0 +# self.sample_weight = NULL +# +# self.samples = NULL +# self.start = 0 +# self.pos = 0 +# self.end = 0 +# +# self.n_outputs = n_outputs +# self.n_samples = 0 +# self.n_node_samples = 0 +# self.weighted_n_node_samples = 0.0 +# self.weighted_n_left = 0.0 +# self.weighted_n_right = 0.0 +# +# # Count labels for each output +# self.sum_total = NULL +# self.sum_left = NULL +# self.sum_right = NULL +# self.n_classes = NULL +# +# safe_realloc(&self.n_classes, n_outputs) +# +# cdef SIZE_t k = 0 +# cdef SIZE_t sum_stride = 0 +# +# # For each target, set the number of unique classes in that target, +# # and also compute the maximal stride of all targets +# for k in range(n_outputs): +# self.n_classes[k] = n_classes[k] +# +# if n_classes[k] > sum_stride: +# sum_stride = n_classes[k] +# +# self.sum_stride = sum_stride +# +# cdef SIZE_t n_elements = n_outputs * sum_stride +# self.sum_total = <double*> calloc(n_elements, sizeof(double)) +# self.sum_left = <double*> calloc(n_elements, sizeof(double)) +# self.sum_right = <double*> calloc(n_elements, sizeof(double)) +# +# if (self.sum_total == NULL or +# self.sum_left == NULL or +# self.sum_right == NULL): +# raise MemoryError() +# +# def __dealloc__(self): +# """Destructor.""" +# free(self.n_classes) +# +# def __reduce__(self): +# return (type(self), +# (self.n_outputs, +# sizet_ptr_to_ndarray(self.n_classes, self.n_outputs)), +# self.__getstate__()) +# +# cdef int init(self, DOUBLE_t* y, SIZE_t y_stride, +# DOUBLE_t* sample_weight, double weighted_n_samples, +# SIZE_t* samples, SIZE_t start, SIZE_t end) nogil except -1: +# """Initialize the criterion at node samples[start:end] and +# children samples[start:start] and samples[start:end]. +# Returns -1 in case of failure to allocate memory (and raise MemoryError) +# or 0 otherwise. +# Parameters +# ---------- +# y : array-like, dtype=DOUBLE_t +# The target stored as a buffer for memory efficiency +# y_stride : SIZE_t +# The stride between elements in the buffer, important if there +# are multiple targets (multi-output) +# sample_weight : array-like, dtype=DTYPE_t +# The weight of each sample +# weighted_n_samples : SIZE_t +# The total weight of all samples +# samples : array-like, dtype=SIZE_t +# A mask on the samples, showing which ones we want to use +# start : SIZE_t +# The first sample to use in the mask +# end : SIZE_t +# The last sample to use in the mask +# """ +# +# self.y = y +# self.y_stride = y_stride +# self.sample_weight = sample_weight +# self.samples = samples +# self.start = start +# self.end = end +# self.n_node_samples = end - start +# self.weighted_n_samples = weighted_n_samples +# self.weighted_n_node_samples = 0.0 +# +# cdef SIZE_t* n_classes = self.n_classes +# cdef double* sum_total = self.sum_total +# +# cdef SIZE_t i +# cdef SIZE_t p +# cdef SIZE_t k +# cdef SIZE_t c +# cdef DOUBLE_t w = 1.0 +# cdef SIZE_t offset = 0 +# +# for k in range(self.n_outputs): +# memset(sum_total + offset, 0, n_classes[k] * sizeof(double)) +# offset += self.sum_stride +# +# for p in range(start, end): +# i = samples[p] +# +# # w is originally set to be 1.0, meaning that if no sample weights +# # are given, the default weight of each sample is 1.0 +# if sample_weight != NULL: +# w = sample_weight[i] +# +# # Count weighted class frequency for each target +# for k in range(self.n_outputs): +# c = <SIZE_t> y[i * y_stride + k] +# sum_total[k * self.sum_stride + c] += w +# +# self.weighted_n_node_samples += w +# +# # Reset to pos=start +# self.reset() +# return 0 +# +# cdef int reset(self) nogil except -1: +# """Reset the criterion at pos=start +# Returns -1 in case of failure to allocate memory (and raise MemoryError) +# or 0 otherwise. +# """ +# self.pos = self.start +# +# self.weighted_n_left = 0.0 +# self.weighted_n_right = self.weighted_n_node_samples +# +# cdef double* sum_total = self.sum_total +# cdef double* sum_left = self.sum_left +# cdef double* sum_right = self.sum_right +# +# cdef SIZE_t* n_classes = self.n_classes +# cdef SIZE_t k +# +# for k in range(self.n_outputs): +# memset(sum_left, 0, n_classes[k] * sizeof(double)) +# memcpy(sum_right, sum_total, n_classes[k] * sizeof(double)) +# +# sum_total += self.sum_stride +# sum_left += self.sum_stride +# sum_right += self.sum_stride +# return 0 +# +# cdef int reverse_reset(self) nogil except -1: +# """Reset the criterion at pos=end +# Returns -1 in case of failure to allocate memory (and raise MemoryError) +# or 0 otherwise. +# """ +# self.pos = self.end +# +# self.weighted_n_left = self.weighted_n_node_samples +# self.weighted_n_right = 0.0 +# +# cdef double* sum_total = self.sum_total +# cdef double* sum_left = self.sum_left +# cdef double* sum_right = self.sum_right +# +# cdef SIZE_t* n_classes = self.n_classes +# cdef SIZE_t k +# +# for k in range(self.n_outputs): +# memset(sum_right, 0, n_classes[k] * sizeof(double)) +# memcpy(sum_left, sum_total, n_classes[k] * sizeof(double)) +# +# sum_total += self.sum_stride +# sum_left += self.sum_stride +# sum_right += self.sum_stride +# return 0 +# +# cdef int update(self, SIZE_t new_pos) nogil except -1: +# """Updated statistics by moving samples[pos:new_pos] to the left child. +# Returns -1 in case of failure to allocate memory (and raise MemoryError) +# or 0 otherwise. +# Parameters +# ---------- +# new_pos : SIZE_t +# The new ending position for which to move samples from the right +# child to the left child. +# """ +# cdef DOUBLE_t* y = self.y +# cdef SIZE_t pos = self.pos +# cdef SIZE_t end = self.end +# +# cdef double* sum_left = self.sum_left +# cdef double* sum_right = self.sum_right +# cdef double* sum_total = self.sum_total +# +# cdef SIZE_t* n_classes = self.n_classes +# cdef SIZE_t* samples = self.samples +# cdef DOUBLE_t* sample_weight = self.sample_weight +# +# cdef SIZE_t i +# cdef SIZE_t p +# cdef SIZE_t k +# cdef SIZE_t c +# cdef SIZE_t label_index +# cdef DOUBLE_t w = 1.0 +# +# # Update statistics up to new_pos +# # +# # Given that +# # sum_left[x] + sum_right[x] = sum_total[x] +# # and that sum_total is known, we are going to update +# # sum_left from the direction that require the least amount +# # of computations, i.e. from pos to new_pos or from end to new_po. +# +# if (new_pos - pos) <= (end - new_pos): +# for p in range(pos, new_pos): +# i = samples[p] +# +# if sample_weight != NULL: +# w = sample_weight[i] +# +# for k in range(self.n_outputs): +# label_index = (k * self.sum_stride + +# <SIZE_t> y[i * self.y_stride + k]) +# sum_left[label_index] += w +# +# self.weighted_n_left += w +# +# else: +# self.reverse_reset() +# +# for p in range(end - 1, new_pos - 1, -1): +# i = samples[p] +# +# if sample_weight != NULL: +# w = sample_weight[i] +# +# for k in range(self.n_outputs): +# label_index = (k * self.sum_stride + +# <SIZE_t> y[i * self.y_stride + k]) +# sum_left[label_index] -= w +# +# self.weighted_n_left -= w +# +# # Update right part statistics +# self.weighted_n_right = self.weighted_n_node_samples - self.weighted_n_left +# for k in range(self.n_outputs): +# for c in range(n_classes[k]): +# sum_right[c] = sum_total[c] - sum_left[c] +# +# sum_right += self.sum_stride +# sum_left += self.sum_stride +# sum_total += self.sum_stride +# +# self.pos = new_pos +# return 0 +# +# cdef double node_impurity(self) nogil: +# pass +# +# cdef void children_impurity(self, double* impurity_left, +# double* impurity_right) nogil: +# pass +# +# cdef void node_value(self, double* dest) nogil: +# """Compute the node value of samples[start:end] and save it into dest. +# Parameters +# ---------- +# dest : double pointer +# The memory address which we will save the node value into. +# """ +# +# cdef double* sum_total = self.sum_total +# cdef SIZE_t* n_classes = self.n_classes +# cdef SIZE_t k +# +# for k in range(self.n_outputs): +# memcpy(dest, sum_total, n_classes[k] * sizeof(double)) +# dest += self.sum_stride +# sum_total += self.sum_stride +# +# cdef class CCriterion(CustomClassificationCriterion): +# r"""Cross Entropy impurity criterion. +# This handles cases where the target is a classification taking values +# 0, 1, ... K-2, K-1. If node m represents a region Rm with Nm observations, +# then let +# count_k = 1 / Nm \sum_{x_i in Rm} I(yi = k) +# be the proportion of class k observations in node m. +# The cross-entropy is then defined as +# cross-entropy = -\sum_{k=0}^{K-1} count_k log(count_k) +# """ +# +# cdef double node_impurity(self) nogil: +# """Evaluate the impurity of the current node, i.e. the impurity of +# samples[start:end], using the cross-entropy criterion.""" +# +# # cdef SIZE_t* n_classes = self.n_classes +# # cdef double* sum_total = self.sum_total +# # cdef double entropy = 0.0 +# # cdef double count_k +# # cdef SIZE_t k +# # cdef SIZE_t c +# # +# # for k in range(self.n_outputs): +# # for c in range(n_classes[k]): +# # count_k = sum_total[c] +# # if count_k > 0.0: +# # count_k /= self.weighted_n_node_samples +# # entropy -= count_k * log(count_k) +# # +# # sum_total += self.sum_stride +# +# return 1.0 +# +# cdef void children_impurity(self, double* impurity_left, +# double* impurity_right) nogil: +# """Evaluate the impurity in children nodes +# i.e. the impurity of the left child (samples[start:pos]) and the +# impurity the right child (samples[pos:end]). +# Parameters +# ---------- +# impurity_left : double pointer +# The memory address to save the impurity of the left node +# impurity_right : double pointer +# The memory address to save the impurity of the right node +# """ +# +# # cdef SIZE_t* n_classes = self.n_classes +# # cdef double* sum_left = self.sum_left +# # cdef double* sum_right = self.sum_right +# # cdef double entropy_left = 0.0 +# # cdef double entropy_right = 0.0 +# # cdef double count_k +# # cdef SIZE_t k +# # cdef SIZE_t c +# # +# # for k in range(self.n_outputs): +# # for c in range(n_classes[k]): +# # count_k = sum_left[c] +# # if count_k > 0.0: +# # count_k /= self.weighted_n_left +# # entropy_left -= count_k * log(count_k) +# # +# # count_k = sum_right[c] +# # if count_k > 0.0: +# # count_k /= self.weighted_n_right +# # entropy_right -= count_k * log(count_k) +# # +# # sum_left += self.sum_stride +# # sum_right += self.sum_stride +# # +# # impurity_left[0] = entropy_left / self.n_outputs +# # impurity_right[0] = entropy_right / self.n_outputs \ No newline at end of file diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py index bb0a6ca0..351698c2 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CGreed.py @@ -5,7 +5,7 @@ from ..Monoview.Additions.QarBoostUtils import ColumnGenerationClassifierQar class CGreed(ColumnGenerationClassifierQar, BaseMonoviewClassifier): - def __init__(self, random_state=None, n_max_iterations=500, n_stumps_per_attribute=1, **kwargs): + def __init__(self, random_state=None, n_max_iterations=500, n_stumps_per_attribute=10, **kwargs): super(CGreed, self).__init__(n_max_iterations=n_max_iterations, random_state=random_state, self_complemented=True, diff --git a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py index 9556dd1c..19297fc3 100644 --- a/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py +++ b/multiview_platform/MonoMultiViewClassifiers/MonoviewClassifiers/CQBoost.py @@ -55,7 +55,8 @@ class CQBoost(ColumnGenerationClassifier, BaseMonoviewClassifier): def formatCmdArgs(args): """Used to format kwargs for the parsed args""" kwargsDict = {"mu": args.CQB_mu, - "epsilon": args.CQB_epsilon} + "epsilon": args.CQB_epsilon, + "n_stumps":args.CQB_stumps} return kwargsDict diff --git a/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py b/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py index c12d0b60..9ef17e0d 100644 --- a/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py +++ b/multiview_platform/MonoMultiViewClassifiers/ResultAnalysis.py @@ -241,8 +241,11 @@ def plotMetricScores(trainScores, testScores, names, nbResults, metricName, file plt.tight_layout() except: pass - f.savefig(fileName) + f.savefig(fileName+'.png') plt.close() + import pandas as pd + dataframe = pd.DataFrame(np.transpose(np.concatenate((trainScores.reshape((trainScores.shape[0], 1)), testScores.reshape((trainScores.shape[0], 1))), axis=1)), columns=names) + dataframe.to_csv(fileName+".csv") def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames): @@ -268,7 +271,7 @@ def publishMetricsGraphs(metricsScores, directory, databaseName, labelsNames): nbResults = len(metricScores["testScores"]) - fileName = directory + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" + databaseName +"-"+"_vs_".join(labelsNames)+ "-" + metricName + ".png" + fileName = directory + time.strftime("%Y_%m_%d-%H_%M_%S") + "-" + databaseName +"-"+"_vs_".join(labelsNames)+ "-" + metricName plotMetricScores(np.array(metricScores["trainScores"]), np.array(metricScores["testScores"]), np.array(metricScores["classifiersNames"]), nbResults, metricName, fileName, tag=" "+" vs ".join(labelsNames)) diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py index 1143c2ea..dafb99d0 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/GetMultiviewDb.py @@ -343,16 +343,16 @@ def add_gaussian_noise(dataset_file, random_state, path_f, dataset_name, noise_s for view_index in range(noisy_dataset.get("Metadata").attrs["nbView"]): view_name = "View" + str(view_index) view_dset = noisy_dataset.get(view_name) - orig_shape = view_dset.value.shape - view_ranges = view_dset.attrs["ranges"] - view_limits = view_dset.attrs["limits"] + # orig_shape = view_dset.value.shape + view_limits = dataset_file["Metadata/View"+str(view_index)+"_limits"].value + view_ranges = view_limits[:,1]-view_limits[:,0] normal_dist = random_state.normal(0, noise_std, view_dset.value.shape) noise = normal_dist*view_ranges noised_data = view_dset.value+noise noised_data = np.where(noised_data<view_limits[:,0], view_limits[:,0], noised_data) noised_data = np.where(noised_data>view_limits[:,1], view_limits[:,1], noised_data) noisy_dataset[view_name][...] = noised_data - final_shape = noised_data.shape + # final_shape = noised_data.shape return noisy_dataset, dataset_name+"_noised" diff --git a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py index a22ccb90..9f3b4939 100644 --- a/multiview_platform/MonoMultiViewClassifiers/utils/execution.py +++ b/multiview_platform/MonoMultiViewClassifiers/utils/execution.py @@ -161,6 +161,10 @@ def parseTheArgs(arguments): help='Set the mu parameter for CQBoost', default=0.001) groupCQBoost.add_argument('--CQB_epsilon', metavar='FLOAT', type=float, action='store', help='Set the epsilon parameter for CQBoost', default=1e-08) + groupCQBoost.add_argument('--CQB_stump', metavar='INT', type=int, + action='store', + help='Set the number of stumps for CQBoost', + default=10) groupCQBoostv2 = parser.add_argument_group('CQBoostv2 arguments') groupCQBoostv2.add_argument('--CQB2_mu', metavar='FLOAT', type=float, action='store', @@ -182,7 +186,7 @@ def parseTheArgs(arguments): groupCGreed = parser.add_argument_group('CGreed arguments') groupCGreed.add_argument('--CGR_stumps', metavar='INT', type=int, action='store', - help='Set the n_stumps_per_attribute parameter for CGreed', default=1) + help='Set the n_stumps_per_attribute parameter for CGreed', default=10) groupCGreed.add_argument('--CGR_n_iter', metavar='INT', type=int, action='store', help='Set the n_max_iterations parameter for CGreed', default=100) @@ -190,7 +194,7 @@ def parseTheArgs(arguments): groupCGDesc.add_argument('--CGD_stumps', metavar='INT', type=int, action='store', help='Set the n_stumps_per_attribute parameter for CGreed', - default=1) + default=10) groupCGDesc.add_argument('--CGD_n_iter', metavar='INT', type=int, action='store', help='Set the n_max_iterations parameter for CGreed', diff --git a/setup.py b/setup.py index 36266ae6..388d42da 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from setuptools import setup, find_packages +from Cython.Build import cythonize import multiview_platform @@ -27,7 +28,7 @@ setup( # Votre email, sachant qu'il sera publique visible, avec tous les risques # que ça implique. - author_email="baptiste.bauvin.1@ulaval.ca", + author_email="baptiste.bauvin@lis-lab.fr", # Une description courte description="Plateforme de test d'algorithmes multivues et monovue", @@ -87,5 +88,5 @@ setup( # Il y a encore une chiée de paramètres possibles, mais avec ça vous # couvrez 90% des besoins - + ext_modules=cythonize("multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/_custom_criterion.pyx"), ) \ No newline at end of file -- GitLab