Skip to content
Snippets Groups Projects
Commit e35b35bf authored by Luc Giffon's avatar Luc Giffon
Browse files

code figures (these luc)

parent 17fa2b1f
Branches
No related tags found
1 merge request!24Resolve "non negative omp"
......@@ -15,7 +15,7 @@ lst_task_train_dev = ["coherence", "correlation"]
tasks = [
# "train_score",
# "dev_score",
"dev_score",
"test_score",
# "coherence",
# "correlation",
......@@ -109,8 +109,8 @@ def add_trace_from_df(df, fig, task, strat, stop_on_flat=False):
global GLOBAL_TRACE_TO_ADD_LAST
df.sort_values(by="forest_size", inplace=True)
df_groupby_forest_size = df.groupby(['forest_size'])
forest_sizes = list(df_groupby_forest_size["forest_size"].mean().values)
df_groupby_forest_size = df.groupby(['pruning_percent'])
forest_sizes = list(df_groupby_forest_size["pruning_percent"].mean().values)
mean_value = df_groupby_forest_size[task].mean().values
std_value = df_groupby_forest_size[task].std().values
......@@ -132,7 +132,8 @@ def add_trace_from_df(df, fig, task, strat, stop_on_flat=False):
width=2
)
),
showlegend=False
name="Final NN-OMP",
showlegend=True
)
forest_sizes = forest_sizes[:index_flat]
......@@ -169,7 +170,21 @@ dct_metric_figure = {
"mean_squared_error": go.Figure()
}
def base_figures():
dct_gamma_by_dataset = {
"Boston": 5,
"Breast Cancer": 5,
"California Housing": 5,
"Diabetes": 5,
"Diamonds": 5,
"Kin8nm": 5,
"KR-VS-KP": 5,
"Spambase": 5,
"Steel Plates": 5,
"Gamma": 5,
"LFW Pairs": 5,
}
def base_figures(skip_NN=False):
for task in tasks:
for data_name in datasets:
......@@ -183,40 +198,40 @@ def base_figures():
# all techniques #
##################
for strat in strategies:
if strat in lst_skip_strategy:
if strat in lst_skip_strategy or (skip_NN and "NN-OMP" in strat):
continue
if task == "negative-percentage-test-score":
if strat == "OMP":
df_strat = df_data[df_data["strategy"] == strat]
df_strat = df_strat[df_strat["subset"] == "train+dev/train+dev"]
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
df_groupby_forest_size = df_strat_wo_weights.groupby(['forest_size'])
forest_sizes = df_groupby_forest_size["forest_size"].mean().values
x_values = df_groupby_forest_size["negative-percentage"].mean().values
y_values = df_groupby_forest_size["test_score"].mean().values
# print(df_strat)
fig.add_trace(go.Scatter(x=x_values, y=y_values,
mode='markers',
name=strat,
# if task == "negative-percentage-test-score":
# if strat == "OMP":
# df_strat = df_data[df_data["strategy"] == strat]
# df_strat = df_strat[df_strat["subset"] == "train+dev/train+dev"]
# df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
#
# df_groupby_forest_size = df_strat_wo_weights.groupby(['forest_size'])
#
#
# forest_sizes = df_groupby_forest_size["forest_size"].mean().values
# x_values = df_groupby_forest_size["negative-percentage"].mean().values
# y_values = df_groupby_forest_size["test_score"].mean().values
# # print(df_strat)
# fig.add_trace(go.Scatter(x=x_values, y=y_values,
# mode='markers',
# name=strat,
# # color=forest_sizes,
# marker=dict(
# # size=16,
# # cmax=39,
# # cmin=0,
# color=forest_sizes,
marker=dict(
# size=16,
# cmax=39,
# cmin=0,
color=forest_sizes,
colorbar=dict(
title="Forest Size"
),
# colorscale="Viridis"
),
# marker=dict(color="rgb{}".format(dct_color_by_strategy[strat]))
))
continue
# colorbar=dict(
# title="Forest Size"
# ),
# # colorscale="Viridis"
# ),
# # marker=dict(color="rgb{}".format(dct_color_by_strategy[strat]))
# ))
#
# continue
df_strat = df_data[df_data["strategy"] == strat]
......@@ -252,8 +267,9 @@ def base_figures():
title = "{} {}".format(task, data_name)
yaxis_title = "% negative weights" if task == "negative-percentage" else dct_score_metric_fancy[score_metric_name]
xaxis_title = "% negative weights" if task == "negative-percentage-test-score" else "# Selected Trees"
xaxis_title = "% negative weights" if task == "negative-percentage-test-score" else "% Selected Trees"
if not skip_nn:
fig.add_trace(GLOBAL_TRACE_TO_ADD_LAST)
fig.update_layout(barmode='group',
# title=title,
......@@ -264,7 +280,7 @@ def base_figures():
size=24,
color="black"
),
# showlegend = False,
showlegend = False,
margin = dict(
l=1,
r=1,
......@@ -285,6 +301,9 @@ def base_figures():
)
)
# fig.show()
if skip_NN:
str_no_nn = " no nn"
title += str_no_nn
sanitize = lambda x: x.replace(" ", "_").replace("/", "_").replace("+", "_")
filename = sanitize(title)
output_dir = out_dir / sanitize(task)
......@@ -375,14 +394,14 @@ def global_figure():
# fig.show()
def weights_wrt_size():
lst_skip_data_weight_effect = ["Gamma", "KR-VS-KP", "Steel Plates"]
# lst_skip_data_weight_effect = ["Gamma", "KR-VS-KP", "Steel Plates"]
lst_skip_data_weight_effect = ["Gamma"]
fig = go.Figure()
for data_name in datasets:
# if data_name in lst_skip_data_weight_effect:
# continue
if data_name in lst_skip_data_weight_effect:
continue
df_data = df_results[df_results["dataset"] == data_name]
score_metric_name = df_data["score_metric"].values[0]
......@@ -401,7 +420,7 @@ def weights_wrt_size():
y_values = df_groupby_forest_size["negative-percentage"].mean().values
y_values = (y_values - np.min(y_values)) / (np.max(y_values) - np.min(y_values))
x_values = np.around(df_groupby_forest_size["pruning_percent"].mean().values, decimals=1)
x_values = df_groupby_forest_size["pruning_percent"].mean().values
# x_values = (x_values - np.min(x_values)) / (np.max(x_values) - np.min(x_values))
# if score_metric_name == "mean_squared_error":
......@@ -410,8 +429,8 @@ def weights_wrt_size():
lin_reg = svm.SVR(gamma=10)
lin_reg.fit(x_values[:, np.newaxis], y_values)
xx = np.linspace(0, 1)
yy = lin_reg.predict(xx[:, np.newaxis])
# xx = np.linspace(0, 1)
yy = lin_reg.predict(x_values[:, np.newaxis])
# print(df_strat)
fig.add_trace(go.Scatter(x=x_values, y=y_values,
......@@ -430,7 +449,7 @@ def weights_wrt_size():
),
# marker=dict(color="rgb{}".format(dct_color_by_strategy[strat]))
))
fig.add_trace(go.Scatter(x=xx, y=yy,
fig.add_trace(go.Scatter(x=x_values, y=yy,
mode='lines',
name=strat,
# color=forest_sizes,
......@@ -452,8 +471,8 @@ def weights_wrt_size():
title = "{}".format("weight wrt size")
fig.update_layout(barmode='group',
title=title,
xaxis_title="Pruning percentage",
# title=title,
xaxis_title="% Selected Trees",
yaxis_title="Standardized % negative weights",
font=dict(
# family="Courier New, monospace",
......@@ -464,8 +483,8 @@ def weights_wrt_size():
margin=dict(
l=1,
r=1,
b=1,
t=1,
b=3,
t=10,
# pad=4
),
legend=dict(
......@@ -488,12 +507,13 @@ def weights_wrt_size():
fig.write_image(str((output_dir / filename).absolute()) + ".png")
def effect_of_weights_figure():
lst_skip_data_weight_effect = ["Gamma", "KR-VS-KP", "Steel Plates"]
lst_skip_data_weight_effect = ["Gamma"]
# lst_skip_data_weight_effect = ["Gamma", "KR-VS-KP", "Steel Plates"]
fig = go.Figure()
for data_name in datasets:
#
# if data_name in lst_skip_data_weight_effect:
# continue
df_data = df_results[df_results["dataset"] == data_name]
......@@ -506,29 +526,31 @@ def effect_of_weights_figure():
df_strat = df_data[df_data["strategy"] == strat]
df_strat = df_strat[df_strat["subset"] == "train+dev/train+dev"]
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
df_strat_wo_weights.sort_values(by="pruning_percent", inplace=True)
df_groupby_forest_size = df_strat_wo_weights.groupby(['forest_size'])
x_values = df_groupby_forest_size["negative-percentage"].mean().values
x_values = (x_values - np.min(x_values)) / (np.max(x_values) - np.min(x_values))
y_values = df_groupby_forest_size["test_score"].mean().values
if score_metric_name == "mean_squared_error":
y_values = 1/y_values
x_values = x_values[3:]
y_values = y_values[3:]
x_values = (x_values - np.min(x_values)) / (np.max(x_values) - np.min(x_values))
y_values = (y_values - np.min(y_values)) / (np.max(y_values) - np.min(y_values))
bins = np.histogram(x_values)[1]
indices_x_values = np.digitize(x_values, bins)-1
mean_val = np.empty(len(bins)-1)
for idx_group in range(len(bins) - 1):
mean_val[idx_group] = np.mean(y_values[indices_x_values == idx_group])
# bins = np.histogram(x_values)[1]
# indices_x_values = np.digitize(x_values, bins)-1
# mean_val = np.empty(len(bins)-1)
# for idx_group in range(len(bins) - 1):
# mean_val[idx_group] = np.mean(y_values[indices_x_values == idx_group])
# lin_reg = LinearRegression()
lin_reg = svm.SVR(gamma=5)
# lin_reg = svm.SVR(gamma=dct_gamma_by_dataset[data_name])
lin_reg = svm.SVR(gamma=1.)
lin_reg.fit(x_values[:, np.newaxis], y_values)
xx = np.linspace(0, 1)
......@@ -540,6 +562,7 @@ def effect_of_weights_figure():
fig.add_trace(go.Scatter(x=x_values, y=y_values,
mode='markers',
name=strat,
showlegend=False,
# color=forest_sizes,
marker=dict(
# size=16,
......@@ -576,15 +599,15 @@ def effect_of_weights_figure():
title = "{}".format("negative weights effect")
fig.update_layout(barmode='group',
title=title,
xaxis_title="Standardized % negative weights",
yaxis_title="Normalized Performance",
# title=title,
xaxis_title="Standardized % Negative Weights",
yaxis_title="Standardized Performance",
font=dict(
# family="Courier New, monospace",
size=24,
color="black"
),
showlegend = False,
# showlegend = False,
margin=dict(
l=1,
r=1,
......@@ -626,7 +649,8 @@ if __name__ == "__main__":
strategies = set(df_results["strategy"].values)
subsets = set(df_results["subset"].values)
# base_figures()
effect_of_weights_figure()
weights_wrt_size()
for skip_nn in [True, False]:
base_figures(skip_nn)
# effect_of_weights_figure()
# weights_wrt_size()
# global_figure()
......@@ -33,18 +33,32 @@ dct_score_metric_best_fct = {
"mean_squared_error": np.argmin
}
# dct_data_short = {
# "Spambase": "Spambase",
# "Diamonds": "Diamonds",
# "Diabetes": "Diabetes",
# "Steel Plates": "Steel P.",
# "KR-VS-KP": "KR-VS-KP",
# "Breast Cancer": "Breast C.",
# "Kin8nm": "Kin8nm",
# "LFW Pairs": "LFW P.",
# "Gamma": "Gamma",
# "California Housing": "California H.",
# "Boston": "Boston",
# }
dct_data_short = {
"Spambase": "Spambase",
"Diamonds": "Diamonds",
"Diabetes": "Diabetes",
"Steel Plates": "Steel P.",
"KR-VS-KP": "KR-VS-KP",
"Breast Cancer": "Breast C.",
"Kin8nm": "Kin8nm",
"Spambase": "Sp. B.",
"Diamonds": "Diam.",
"Diabetes": "Diab.",
"Steel Plates": "St. P.",
"KR-VS-KP": "KR-KP",
"Breast Cancer": "B. C.",
"Kin8nm": "Kin.",
"LFW Pairs": "LFW P.",
"Gamma": "Gamma",
"California Housing": "California H.",
"Boston": "Boston",
"Gamma": "Gam.",
"California Housing": "C. H.",
"Boston": "Bos.",
}
dct_data_best = {
......@@ -101,7 +115,7 @@ def get_max_from_df(df, best_fct):
if __name__ == "__main__":
load_dotenv(find_dotenv('.env'))
dir_name = "bolsonaro_models_25-03-20"
dir_name = "bolsonaro_models_29-03-20_v3_2"
dir_path = Path(os.environ["project_dir"]) / "results" / dir_name
out_dir = Path(os.environ["project_dir"]) / "reports/figures" / dir_name
......@@ -155,29 +169,19 @@ if __name__ == "__main__":
if "OMP" in strat:
###########################
# traitement avec weights #
# traitement without weights #
###########################
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
if data_name == "Boston" and subset_name == "train+dev/train+dev":
df_strat_wo_weights = df_strat_wo_weights[df_strat_wo_weights["forest_size"] < 400]
dct_data_lst_tpl_results[data_name].append(get_max_from_df(df_strat_wo_weights, dct_score_metric_best_fct[score_metric_name]))
if strat not in lst_strats: lst_strats.append(strat)
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == True]
if "OMP" in strat and subset_name == "train/dev":
continue
elif "Random" not in strat and subset_name == "train/dev":
continue
strat_woweights = "{} w/o weights".format(strat)
dct_data_lst_tpl_results[data_name].append(get_max_from_df(df_strat_wo_weights, dct_score_metric_best_fct[score_metric_name]))
if strat_woweights not in lst_strats: lst_strats.append(strat_woweights)
#################################
# traitement general wo_weights #
#################################
if "Random" in strat:
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
else:
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == True]
if "OMP" in strat:
strat = "{} w/o weights".format(strat)
dct_data_lst_tpl_results[data_name].append(get_max_from_df(df_strat_wo_weights, dct_score_metric_best_fct[score_metric_name]))
if strat not in lst_strats: lst_strats.append(strat)
......@@ -219,7 +223,8 @@ if __name__ == "__main__":
lst_tpl_results = dct_data_lst_tpl_results[data_name]
data_name_short = dct_data_short[data_name]
s_data_tmp = "{}".format(data_name_short)
s_data_tmp += "({})".format(dct_data_metric[data_name])
# add metric in parenthesis
# s_data_tmp += "({})".format(dct_data_metric[data_name])
# s_data_tmp = "\\texttt{{ {} }}".format(data_name_short)
# s_data_tmp = "\\multicolumn{{2}}{{c}}{{ \\texttt{{ {} }} }}".format(data_name)
s_data_tmp += " "*(nb_spaces - len(data_name_short))
......@@ -292,8 +297,8 @@ if __name__ == "__main__":
print("\\midrule")
if idx_lin == 6:
print("\\midrule")
if lst_data_ordered[idx_lin-1] == "Diamonds":
print("%", end="")
# if lst_data_ordered[idx_lin-1] == "Diamonds":
# print("%", end="")
line_print = " ".join(list(lin))
line_print = line_print.rstrip(" &") + "\\\\"
print(line_print)
......
import copy
from dotenv import load_dotenv, find_dotenv
from pathlib import Path
import os
import pandas as pd
import numpy as np
from pprint import pprint
import plotly.graph_objects as go
import plotly.io as pio
from collections import defaultdict
lst_skip_strategy = ["None", "OMP Distillation", "OMP Distillation w/o weights"]
lst_skip_task = ["correlation", "coherence"]
# lst_skip_task = []
lst_skip_subset = ["train/dev"]
# lst_skip_subset = []
tasks = [
# "train_score",
# "dev_score",
"test_score",
# "coherence",
# "correlation"
]
dct_score_metric_fancy = {
"accuracy_score": "% Accuracy",
"mean_squared_error": "MSE"
}
dct_score_metric_best_fct = {
"accuracy_score": np.argmax,
"mean_squared_error": np.argmin
}
# dct_data_short = {
# "Spambase": "Spambase",
# "Diamonds": "Diamonds",
# "Diabetes": "Diabetes",
# "Steel Plates": "Steel P.",
# "KR-VS-KP": "KR-VS-KP",
# "Breast Cancer": "Breast C.",
# "Kin8nm": "Kin8nm",
# "LFW Pairs": "LFW P.",
# "Gamma": "Gamma",
# "California Housing": "California H.",
# "Boston": "Boston",
# }
dct_data_short = {
"Spambase": "Sp. B.",
"Diamonds": "Diam.",
"Diabetes": "Diab.",
"Steel Plates": "St. P.",
"KR-VS-KP": "KR-KP",
"Breast Cancer": "B. C.",
"Kin8nm": "Kin.",
"LFW Pairs": "LFW P.",
"Gamma": "Gam.",
"California Housing": "C. H.",
"Boston": "Bos.",
}
dct_data_best = {
"Spambase": np.max,
"Diamonds": np.min,
"Diabetes": np.min,
"Steel Plates": np.max,
"KR-VS-KP": np.max,
"Breast Cancer": np.max,
"Kin8nm": np.min,
"LFW Pairs": np.max,
"Gamma": np.max,
"California Housing": np.min,
"Boston": np.min,
}
dct_data_metric = {
"Spambase": "Acc.",
"Diamonds": "MSE",
"Diabetes": "MSE",
"Steel Plates": "Acc.",
"KR-VS-KP": "Acc.",
"Breast Cancer": "Acc.",
"Kin8nm": "MSE",
"LFW Pairs": "Acc.",
"Gamma": "Acc.",
"California Housing": "MSE",
"Boston": "MSE",
}
def get_max_from_df(df, best_fct):
nb_to_consider = 10
df.sort_values(by="forest_size", inplace=True)
df_groupby_forest_size = df.groupby(['forest_size'])
forest_sizes = list(df_groupby_forest_size["forest_size"].mean().values)[:nb_to_consider]
mean_value = df_groupby_forest_size[task].mean().values[:nb_to_consider]
std_value = df_groupby_forest_size[task].std().values[:nb_to_consider]
try:
argmax = best_fct(mean_value)
except:
print("no results", strat, data_name, task, subset_name)
return -1, -1, -1
max_mean = mean_value[argmax]
max_std = std_value[argmax]
max_forest_size = forest_sizes[argmax]
return max_forest_size, max_mean, max_std
if __name__ == "__main__":
load_dotenv(find_dotenv('.env'))
dir_name = "bolsonaro_models_29-03-20_v3_2"
dir_path = Path(os.environ["project_dir"]) / "results" / dir_name
out_dir = Path(os.environ["project_dir"]) / "reports/figures" / dir_name
input_dir_file = dir_path / "results.csv"
df_results = pd.read_csv(open(input_dir_file, 'rb'))
datasets = set(df_results["dataset"].values)
strategies = sorted(list(set(df_results["strategy"].values) - set(lst_skip_strategy)))
subsets = set(df_results["subset"].values)
r"""
\begin{table}[!h]
\centering
\begin{tabular}{l{}}
\toprule
\multicolumn{1}{c}{\textbf{Dataset}} & \textbf{Data dim.} $\datadim$ & \textbf{\# classes} & \textbf{Train size} $\nexamples$ & \textbf{Test size} $\nexamples'$ \\ \midrule
\texttt{MNIST}~\cite{lecun-mnisthandwrittendigit-2010} & 784 & 10 & 60 000 & 10 000 \\ %\hline
\texttt{Kddcup99}~\cite{Dua:2019} & 116 & 23 & 4 893 431 & 5 000 \\
\bottomrule
\end{tabular}
\caption{Main features of the datasets. Discrete, unordered attributes for dataset Kddcup99 have been encoded as one-hot attributes.}
\label{table:data}
\end{table}
"""
for task in tasks:
if task in lst_skip_task:
continue
dct_data_lst_tpl_results = defaultdict(lambda: [])
lst_strats = []
for data_name in datasets:
df_data = df_results[df_results["dataset"] == data_name]
score_metric_name = df_data["score_metric"].values[0]
for subset_name in subsets:
if subset_name in lst_skip_subset:
continue
df_subset = df_data[df_data["subset"] == subset_name]
##################
# all techniques #
##################
for strat in strategies:
if strat in lst_skip_strategy:
continue
df_strat = df_subset[df_subset["strategy"] == strat]
if "OMP" in strat:
###########################
# traitement without weights #
###########################
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == True]
strat_woweights = "{} w/o weights".format(strat)
dct_data_lst_tpl_results[data_name].append(get_max_from_df(df_strat_wo_weights, dct_score_metric_best_fct[score_metric_name]))
if strat_woweights not in lst_strats: lst_strats.append(strat_woweights)
#################################
# traitement general wo_weights #
#################################
df_strat_wo_weights = df_strat[df_strat["wo_weights"] == False]
dct_data_lst_tpl_results[data_name].append(get_max_from_df(df_strat_wo_weights, dct_score_metric_best_fct[score_metric_name]))
if strat not in lst_strats: lst_strats.append(strat)
title = "{} {} {}".format(task, data_name, subset_name)
# fig.show()
sanitize = lambda x: x.replace(" ", "_").replace("/", "_").replace("+", "_")
filename = sanitize(title)
# output_dir = out_dir / sanitize(subset_name) / sanitize(task)
# output_dir.mkdir(parents=True, exist_ok=True)
# fig.write_image(str((output_dir / filename).absolute()) + ".png")
# pprint(dct_data_lst_tpl_results)
lst_data_ordered = [
"Diamonds",
"Diabetes",
"Kin8nm",
"California Housing",
"Boston",
"Spambase",
"Steel Plates",
"KR-VS-KP",
"Breast Cancer",
"LFW Pairs",
"Gamma"
]
arr_results_str = np.empty((len(lst_strats)+1, len(datasets) + 1 ), dtype="object")
nb_spaces = 25
dct_strat_str = defaultdict(lambda: [])
s_empty = "{}" + " "*(nb_spaces-2) + " & "
arr_results_str[0][0] = s_empty
# arr_results_str[0][1] = s_empty
for idx_data, data_name in enumerate(lst_data_ordered):
lst_tpl_results = dct_data_lst_tpl_results[data_name]
data_name_short = dct_data_short[data_name]
# s_data_tmp = "{}".format(data_name_short)
# add metric in parenthesis
# s_data_tmp += "({})".format(dct_data_metric[data_name])
# s_data_tmp = "\\texttt{{ {} }}".format(data_name_short)
s_data_tmp = "\\multicolumn{{2}}{{c}}{{ \\texttt{{ {} }} }}".format(data_name)
s_data_tmp += " "*(nb_spaces - len(s_data_tmp))
s_data_tmp += " & "
arr_results_str[0, idx_data + 1] = s_data_tmp
array_results = np.array(lst_tpl_results)
best_result_perf = dct_data_best[data_name](array_results[:, 1])
best_result_perf_indexes = np.argwhere(array_results[:, 1] == best_result_perf)
copye_array_results = copy.deepcopy(array_results)
if dct_data_best[data_name] is np.min:
copye_array_results[best_result_perf_indexes] = np.inf
else:
copye_array_results[best_result_perf_indexes] = -np.inf
best_result_perf_2 = dct_data_best[data_name](copye_array_results[:, 1])
best_result_perf_indexes_2 = np.argwhere(copye_array_results[:, 1] == best_result_perf_2)
best_result_prune = np.min(array_results[:, 0])
best_result_prune_indexes = np.argwhere(array_results[:, 0] == best_result_prune)
for idx_strat, tpl_results in enumerate(array_results):
str_strat = "\\texttt{{ {} }}".format(lst_strats[idx_strat])
# str_strat = "\\multicolumn{{2}}{{c}}{{ \\texttt{{ {} }} }}".format(lst_strats[idx_strat])
# str_strat = "\\multicolumn{{2}}{{c}}{{ \\thead{{ \\texttt{{ {} }} }} }}".format("}\\\\ \\texttt{".join(lst_strats[idx_strat].split(" ", 1)))
# str_strat = "\\multicolumn{{2}}{{c}}{{ \\thead{{ {} }} }} ".format("\\\\".join(lst_strats[idx_strat].split(" ", 1)))
str_strat += " " * (nb_spaces - len(str_strat)) + " & "
arr_results_str[idx_strat+1, 0] = str_strat
# str_header = " {} & #tree &".format(dct_data_metric[data_name])
# arr_results_str[idx_strat + 1, 1] = str_header
best_forest_size = tpl_results[0]
best_mean = tpl_results[1]
best_std = tpl_results[2]
if dct_data_metric[data_name] == "Acc.":
str_perf = "{:.2f}\\%".format(best_mean * 100)
else:
str_perf = "{:.3E}".format(best_mean)
str_prune = "{:d}".format(int(best_forest_size))
if idx_strat in best_result_perf_indexes:
# str_formating = "\\textbf{{ {} }}".format(str_result_loc)
str_formating = "\\textbf[{}]"
# str_formating = "\\textbf{{ {:.3E} }}(\\~{:.3E})".format(best_mean, best_std)
elif idx_strat in best_result_perf_indexes_2:
str_formating = "\\underline[{}]"
# str_formating = "\\underline{{ {:.3E} }}(\\~{:.3E})".format(best_mean, best_std)
else:
str_formating = "{}"
# str_formating = "{:.3E}(~{:.3E})".format(best_mean, best_std)
if idx_strat in best_result_prune_indexes:
str_formating = str_formating.format("\\textit[{}]")
# str_prune = " & \\textit{{ {:d} }}".format(int(best_forest_size))
# else:
# str_prune = " & {:d}".format(int(best_forest_size))
str_result = str_formating.format(str_perf) + " & " + str_formating.format(str_prune)
str_result += " "*(nb_spaces - len(str_result))
str_result = str_result.replace("[", "{").replace("]", "}")
arr_results_str[idx_strat+1, idx_data+1] = str_result + " & "
dct_strat_str[lst_strats[idx_strat]].append(str_result)
# arr_results_str = arr_results_str.T
arr_results_str_classif = arr_results_str[:, 6:]
arr_results_str_classif = np.hstack([arr_results_str[:, 0:1], arr_results_str_classif])
arr_results_str_reg = arr_results_str[:, :6]
for arr_results_str in [arr_results_str_classif, arr_results_str_reg]:
print(r"\toprule")
for idx_lin, lin in enumerate(arr_results_str):
if idx_lin == 1:
print("\\midrule")
# if idx_lin == 6:
# print("\\midrule")
# if lst_data_ordered[idx_lin-1] == "Diamonds":
# print("%", end="")
line_print = " ".join(list(lin))
line_print = line_print.rstrip(" &") + "\\\\"
print(line_print)
print(r"\bottomrule")
# s_data = s_data.rstrip(" &") + "\\\\"
# print(s_data)
# for strat, lst_str_results in dct_strat_str.items():
# str_strat = "\\texttt{{ {} }}".format(strat)
# str_strat += " "*(nb_spaces - len(str_strat))
# str_strat += " & " + " & ".join(lst_str_results)
# str_strat += "\\\\"
# print(str_strat)
# exit()
......@@ -60,7 +60,7 @@ dct_dataset_fancy = {
}
dct_dataset_base_forest_size = {
"boston": 1000,
"boston": 100,
"breast_cancer": 1000,
"california_housing": 1000,
"diabetes": 108,
......@@ -132,7 +132,7 @@ if __name__ == "__main__":
dct_results["wo_weights"].append(bool_wo_weights)
dct_results["base_forest_size"].append(dct_dataset_base_forest_size[dataset])
pruning_percent = forest_size / dct_dataset_base_forest_size[dataset]
dct_results["pruning_percent"].append(np.round(pruning_percent, decimals=1))
dct_results["pruning_percent"].append(np.round(pruning_percent, decimals=2))
dct_nb_val_scores = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment