Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Supervised MultiModal Integration Tool
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Analyze
Contributor analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Baptiste Bauvin
Supervised MultiModal Integration Tool
Commits
aed9da3b
Commit
aed9da3b
authored
Dec 4, 2018
by
Baptiste Bauvin
Browse files
Options
Downloads
Patches
Plain Diff
Reformatted code
parent
77fc6fe0
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
+95
-59
95 additions, 59 deletions
...oMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
with
95 additions
and
59 deletions
multiview_platform/MonoMultiViewClassifiers/Monoview/Additions/QarBoostUtils.py
+
95
−
59
View file @
aed9da3b
...
...
@@ -9,7 +9,8 @@ from sklearn.base import BaseEstimator, ClassifierMixin
import
time
import
matplotlib.pyplot
as
plt
from
.BoostUtils
import
StumpsClassifiersGenerator
,
sign
,
BaseBoost
,
getInterpretBase
,
get_accuracy_graph
from
.BoostUtils
import
StumpsClassifiersGenerator
,
sign
,
BaseBoost
,
\
getInterpretBase
,
get_accuracy_graph
from
...
import
Metrics
...
...
@@ -59,7 +60,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
if
n_stumps_per_attribute
:
self
.
n_stumps
=
n_stumps_per_attribute
self
.
use_r
=
use_r
self
.
printed_args_name_list
=
[
"
n_max_iterations
"
,
"
self_complemented
"
,
"
twice_the_same
"
,
self
.
printed_args_name_list
=
[
"
n_max_iterations
"
,
"
self_complemented
"
,
"
twice_the_same
"
,
"
c_bound_choice
"
,
"
random_start
"
,
"
n_stumps
"
,
"
use_r
"
]
...
...
@@ -87,12 +89,19 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self
.
init_boosting
(
m
,
formatted_y
,
y_kernel_matrix
)
self
.
break_cause
=
"
the maximum number of iterations was attained.
"
for
k
in
range
(
min
(
n
-
1
,
self
.
n_max_iterations
-
1
if
self
.
n_max_iterations
is
not
None
else
np
.
inf
)):
for
k
in
range
(
min
(
n
-
1
,
self
.
n_max_iterations
-
1
if
self
.
n_max_iterations
is
not
None
else
np
.
inf
)):
# Print dynamically the step and the error of the current classifier
print
(
"
Resp. bound : {}, {}/{}, eps :{}
"
.
format
(
self
.
respected_bound
,
k
+
2
,
self
.
n_max_iterations
,
self
.
voter_perfs
[
-
1
]),
end
=
"
\r
"
)
sol
,
new_voter_index
=
self
.
choose_new_voter
(
y_kernel_matrix
,
formatted_y
)
print
(
"
Resp. bound : {}, {}/{}, eps :{}
"
.
format
(
self
.
respected_bound
,
k
+
2
,
self
.
n_max_iterations
,
self
.
voter_perfs
[
-
1
]),
end
=
"
\r
"
)
sol
,
new_voter_index
=
self
.
choose_new_voter
(
y_kernel_matrix
,
formatted_y
)
if
type
(
sol
)
==
str
:
self
.
break_cause
=
new_voter_index
#
...
...
@@ -108,9 +117,9 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self
.
update_info_containers
(
formatted_y
,
voter_perf
,
k
)
self
.
nb_opposed_voters
=
self
.
check_opposed_voters
()
self
.
estimators_generator
.
estimators_
=
self
.
estimators_generator
.
estimators_
[
self
.
chosen_columns_
]
self
.
estimators_generator
.
estimators_
=
\
self
.
estimators_generator
.
estimators_
[
self
.
chosen_columns_
]
self
.
weights_
=
np
.
array
(
self
.
weights_
)
self
.
weights_
/=
np
.
sum
(
self
.
weights_
)
...
...
@@ -129,7 +138,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
logging
.
warning
(
'
Converting sparse matrix to dense matrix.
'
)
X
=
np
.
array
(
X
.
todense
())
classification_matrix
=
self
.
_binary_classification_matrix
(
X
)
margins
=
np
.
squeeze
(
np
.
asarray
(
np
.
matmul
(
classification_matrix
,
self
.
weights_
)))
margins
=
np
.
squeeze
(
np
.
asarray
(
np
.
matmul
(
classification_matrix
,
self
.
weights_
)))
signs_array
=
np
.
array
([
int
(
x
)
for
x
in
sign
(
margins
)])
signs_array
[
signs_array
==
-
1
]
=
0
end
=
time
.
time
()
...
...
@@ -141,7 +151,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
self
.
example_weights_
.
append
(
self
.
example_weights
)
self
.
previous_vote
=
np
.
matmul
(
self
.
classification_matrix
[:,
self
.
chosen_columns_
],
np
.
array
(
self
.
weights_
).
reshape
((
k
+
2
,
1
))).
reshape
((
self
.
n_total_examples
,
1
))
np
.
array
(
self
.
weights_
).
reshape
((
k
+
2
,
1
))).
reshape
(
(
self
.
n_total_examples
,
1
))
self
.
previous_votes
.
append
(
self
.
previous_vote
)
self
.
previous_margins
.
append
(
...
...
@@ -150,7 +161,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
if
self
.
use_r
:
bound
=
self
.
bounds
[
-
1
]
*
math
.
sqrt
(
1
-
voter_perf
**
2
)
else
:
bound
=
np
.
prod
(
np
.
sqrt
(
1
-
4
*
np
.
square
(
0.5
-
np
.
array
(
self
.
voter_perfs
))))
bound
=
np
.
prod
(
np
.
sqrt
(
1
-
4
*
np
.
square
(
0.5
-
np
.
array
(
self
.
voter_perfs
))))
if
train_metric
>
bound
:
self
.
respected_bound
=
False
...
...
@@ -193,7 +205,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
y_kernel_matrix
)
return
sol
,
new_voter_index
def
init_boosting
(
self
,
m
,
y
,
y_kernel_matrix
):
"""
THis initialization corressponds to the first round of boosting with equal weights for each examples and the voter chosen by it
'
s margin.
"""
self
.
example_weights
=
self
.
_initialize_alphas
(
m
).
reshape
((
m
,
1
))
...
...
@@ -220,7 +231,6 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
epsilon
=
self
.
_compute_epsilon
(
y
)
self
.
voter_perfs
.
append
(
epsilon
)
if
self
.
use_r
:
self
.
q
=
0.5
*
math
.
log
((
1
+
r
)
/
(
1
-
r
))
else
:
...
...
@@ -260,7 +270,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def
init_hypotheses
(
self
,
X
,
y
):
"""
Inintialization for the hyptotheses used to build the boosted vote
"""
if
self
.
estimators_generator
is
None
:
self
.
estimators_generator
=
StumpsClassifiersGenerator
(
n_stumps_per_attribute
=
self
.
n_stumps
,
self
.
estimators_generator
=
StumpsClassifiersGenerator
(
n_stumps_per_attribute
=
self
.
n_stumps
,
self_complemented
=
self
.
self_complemented
)
self
.
estimators_generator
.
fit
(
X
,
y
)
self
.
classification_matrix
=
self
.
_binary_classification_matrix
(
X
)
...
...
@@ -286,28 +297,38 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def
_compute_epsilon
(
self
,
y
):
"""
Updating the error variable, the old fashioned way uses the whole majority vote to update the error
"""
ones_matrix
=
np
.
zeros
(
y
.
shape
)
ones_matrix
[
np
.
multiply
(
y
,
self
.
new_voter
.
reshape
(
y
.
shape
))
<
0
]
=
1
# can np.divide if needed
ones_matrix
[
np
.
multiply
(
y
,
self
.
new_voter
.
reshape
(
y
.
shape
))
<
0
]
=
1
# can np.divide if needed
epsilon
=
np
.
average
(
ones_matrix
,
weights
=
self
.
example_weights
,
axis
=
0
)
return
epsilon
def
_compute_r
(
self
,
y
):
ones_matrix
=
np
.
ones
(
y
.
shape
)
ones_matrix
[
np
.
multiply
(
y
,
self
.
new_voter
.
reshape
(
y
.
shape
))
<
0
]
=
-
1
# can np.divide if needed
ones_matrix
[
np
.
multiply
(
y
,
self
.
new_voter
.
reshape
(
y
.
shape
))
<
0
]
=
-
1
# can np.divide if needed
r
=
np
.
average
(
ones_matrix
,
weights
=
self
.
example_weights
,
axis
=
0
)
return
r
def
update_example_weights
(
self
,
y
):
"""
Old fashioned exaple weights update uses the whole majority vote, the other way uses only the last voter.
"""
new_weights
=
self
.
example_weights
.
reshape
((
self
.
n_total_examples
,
1
))
*
np
.
exp
(
-
self
.
q
*
np
.
multiply
(
y
,
self
.
new_voter
))
new_weights
=
self
.
example_weights
.
reshape
(
(
self
.
n_total_examples
,
1
))
*
np
.
exp
(
-
self
.
q
*
np
.
multiply
(
y
,
self
.
new_voter
))
self
.
example_weights
=
new_weights
/
np
.
sum
(
new_weights
)
def
_find_best_weighted_margin
(
self
,
y_kernel_matrix
,
upper_bound
=
1.0
,
lower_bound
=
0.0
):
def
_find_best_weighted_margin
(
self
,
y_kernel_matrix
,
upper_bound
=
1.0
,
lower_bound
=
0.0
):
"""
Finds the new voter by choosing the one that has the best weighted margin between 0.5 and 0.55
to avoid too god voters that will get all the votes weights
"""
weighted_kernel_matrix
=
np
.
multiply
(
y_kernel_matrix
,
self
.
example_weights
.
reshape
((
self
.
n_total_examples
,
1
)))
pseudo_h_values
=
ma
.
array
(
np
.
sum
(
weighted_kernel_matrix
,
axis
=
0
),
fill_value
=-
np
.
inf
)
weighted_kernel_matrix
=
np
.
multiply
(
y_kernel_matrix
,
self
.
example_weights
.
reshape
(
(
self
.
n_total_examples
,
1
)))
pseudo_h_values
=
ma
.
array
(
np
.
sum
(
weighted_kernel_matrix
,
axis
=
0
),
fill_value
=-
np
.
inf
)
pseudo_h_values
[
self
.
chosen_columns_
]
=
ma
.
masked
acceptable_indices
=
np
.
where
(
np
.
logical_and
(
np
.
greater
(
upper_bound
,
pseudo_h_values
),
np
.
greater
(
pseudo_h_values
,
lower_bound
)))[
0
]
acceptable_indices
=
np
.
where
(
np
.
logical_and
(
np
.
greater
(
upper_bound
,
pseudo_h_values
),
np
.
greater
(
pseudo_h_values
,
lower_bound
)))[
0
]
if
acceptable_indices
.
size
>
0
:
worst_h_index
=
self
.
random_state
.
choice
(
acceptable_indices
)
return
worst_h_index
,
[
0
]
...
...
@@ -336,8 +357,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
possible_sols
=
[]
indices
=
[]
causes
=
[]
for
hypothese_index
,
hypothese
in
enumerate
(
y_kernel_matrix
.
transpose
()):
if
(
hypothese_index
not
in
self
.
chosen_columns_
or
self
.
twice_the_same
)
\
for
hypothese_index
,
hypothese
in
enumerate
(
y_kernel_matrix
.
transpose
()):
if
(
hypothese_index
not
in
self
.
chosen_columns_
or
self
.
twice_the_same
)
\
and
set
(
self
.
chosen_columns_
)
!=
{
hypothese_index
}
\
and
self
.
_is_not_too_wrong
(
hypothese
,
y
):
w
=
self
.
_solve_one_weight_min_c
(
hypothese
,
y
)
...
...
@@ -363,18 +386,26 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
No precalc because longer ; see the
"
derivee
"
latex document for more precision
"""
m
=
next_column
.
shape
[
0
]
zero_diag
=
np
.
ones
((
m
,
m
))
-
np
.
identity
(
m
)
weighted_previous_sum
=
np
.
multiply
(
y
,
self
.
previous_vote
.
reshape
((
m
,
1
)))
weighted_next_column
=
np
.
multiply
(
next_column
.
reshape
((
m
,
1
)),
self
.
example_weights
.
reshape
((
m
,
1
)))
weighted_previous_sum
=
np
.
multiply
(
y
,
self
.
previous_vote
.
reshape
((
m
,
1
)))
weighted_next_column
=
np
.
multiply
(
next_column
.
reshape
((
m
,
1
)),
self
.
example_weights
.
reshape
((
m
,
1
)))
self
.
B2
=
np
.
sum
(
weighted_next_column
**
2
)
self
.
B1
=
np
.
sum
(
2
*
weighted_next_column
*
weighted_previous_sum
)
self
.
B0
=
np
.
sum
(
weighted_previous_sum
**
2
)
M2
=
np
.
sum
(
np
.
multiply
(
np
.
matmul
(
weighted_next_column
,
np
.
transpose
(
weighted_next_column
)),
zero_diag
))
M1
=
np
.
sum
(
np
.
multiply
(
np
.
matmul
(
weighted_previous_sum
,
np
.
transpose
(
weighted_next_column
))
+
np
.
matmul
(
weighted_next_column
,
np
.
transpose
(
weighted_previous_sum
))
M2
=
np
.
sum
(
np
.
multiply
(
np
.
matmul
(
weighted_next_column
,
np
.
transpose
(
weighted_next_column
)),
zero_diag
))
M1
=
np
.
sum
(
np
.
multiply
(
np
.
matmul
(
weighted_previous_sum
,
np
.
transpose
(
weighted_next_column
))
+
np
.
matmul
(
weighted_next_column
,
np
.
transpose
(
weighted_previous_sum
))
,
zero_diag
))
M0
=
np
.
sum
(
np
.
multiply
(
np
.
matmul
(
weighted_previous_sum
,
np
.
transpose
(
weighted_previous_sum
)),
zero_diag
))
M0
=
np
.
sum
(
np
.
multiply
(
np
.
matmul
(
weighted_previous_sum
,
np
.
transpose
(
weighted_previous_sum
)),
zero_diag
))
self
.
A2
=
self
.
B2
+
M2
self
.
A1
=
self
.
B1
+
M1
...
...
@@ -387,7 +418,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
if
C1
==
0
:
return
[
'
break
'
,
"
the derivate was constant
"
]
else
:
is_acceptable
,
sol
=
self
.
_analyze_solutions_one_weight
(
np
.
array
(
float
(
C0
)
/
C1
).
reshape
((
1
,
1
)))
is_acceptable
,
sol
=
self
.
_analyze_solutions_one_weight
(
np
.
array
(
float
(
C0
)
/
C1
).
reshape
((
1
,
1
)))
if
is_acceptable
:
return
np
.
array
([
sol
])
try
:
...
...
@@ -421,7 +453,8 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
def
_cbound
(
self
,
sol
):
"""
Computing the objective function
"""
return
1
-
(
self
.
A2
*
sol
**
2
+
self
.
A1
*
sol
+
self
.
A0
)
/
(
self
.
B2
*
sol
**
2
+
self
.
B1
*
sol
+
self
.
B0
)
/
self
.
n_total_examples
return
1
-
(
self
.
A2
*
sol
**
2
+
self
.
A1
*
sol
+
self
.
A0
)
/
(
self
.
B2
*
sol
**
2
+
self
.
B1
*
sol
+
self
.
B0
)
/
self
.
n_total_examples
def
_best_sol
(
self
,
sols
):
"""
Return the best min in the two possible sols
"""
...
...
@@ -441,8 +474,10 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
except
:
raise
filenames
=
[]
max_weight
=
max
([
np
.
max
(
examples_weights
)
for
examples_weights
in
self
.
example_weights_
])
min_weight
=
min
([
np
.
max
(
examples_weights
)
for
examples_weights
in
self
.
example_weights_
])
max_weight
=
max
([
np
.
max
(
examples_weights
)
for
examples_weights
in
self
.
example_weights_
])
min_weight
=
min
([
np
.
max
(
examples_weights
)
for
examples_weights
in
self
.
example_weights_
])
for
iterIndex
,
examples_weights
in
enumerate
(
self
.
example_weights_
):
r
=
np
.
array
(
examples_weights
)
theta
=
np
.
arange
(
self
.
n_total_examples
)
...
...
@@ -464,18 +499,19 @@ class ColumnGenerationClassifierQar(BaseEstimator, ClassifierMixin, BaseBoost):
imageio
.
mimsave
(
path
+
'
/weights.gif
'
,
images
,
duration
=
1.
/
2
)
import
shutil
shutil
.
rmtree
(
path
+
"
/gif_images
"
)
get_accuracy_graph
(
self
.
voter_perfs
,
self
.
__class__
.
__name__
,
directory
+
'
voter_perfs.png
'
,
"
Errors
"
)
interpretString
=
getInterpretBase
(
self
,
directory
,
"
QarBoost
"
,
self
.
weights_
,
self
.
break_cause
)
args_dict
=
dict
((
arg_name
,
str
(
self
.
__dict__
[
arg_name
]))
for
arg_name
in
self
.
printed_args_name_list
)
interpretString
+=
"
\n
\n
With arguments :
\n
"
+
u
'
\u2022
'
+
(
"
\n
"
+
u
'
\u2022
'
).
join
([
'
%s:
\t
%s
'
%
(
key
,
value
)
for
(
key
,
value
)
in
args_dict
.
items
()])
get_accuracy_graph
(
self
.
voter_perfs
,
self
.
__class__
.
__name__
,
directory
+
'
voter_perfs.png
'
,
"
Errors
"
)
interpretString
=
getInterpretBase
(
self
,
directory
,
"
QarBoost
"
,
self
.
weights_
,
self
.
break_cause
)
args_dict
=
dict
(
(
arg_name
,
str
(
self
.
__dict__
[
arg_name
]))
for
arg_name
in
self
.
printed_args_name_list
)
interpretString
+=
"
\n
\n
With arguments :
\n
"
+
u
'
\u2022
'
+
(
"
\n
"
+
u
'
\u2022
'
).
join
([
'
%s:
\t
%s
'
%
(
key
,
value
)
for
(
key
,
value
)
in
args_dict
.
items
()])
if
not
self
.
respected_bound
:
interpretString
+=
"
\n\n
The bound was not respected
"
return
interpretString
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment