Skip to content
Snippets Groups Projects
Commit abb7717f authored by Luc Giffon's avatar Luc Giffon
Browse files

graph drawing scripts

parent fd687993
No related branches found
No related tags found
No related merge requests found
import os
import pathlib
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from skluc.utils import logger
matplotlib.rcParams.update({'font.size': 14})
pd.set_option('display.width', 1000)
if __name__ == "__main__":
# Files parameters
###################
FILENAME = "gathered_results.csv"
DIRNAME = "/home/luc/Resultats/Deepstrom/jully_2018/cifar100_few_data_nodropout"
filepath = os.path.join(DIRNAME, FILENAME)
# figure parameters
###################
min_acc = 0.00
max_acc = 1.05
# max_acc = 1.0
linewidth = 0.9
output_conv_dim = 512
nb_classes = 10
real_nys_marker = "s"
learned_nys_marker = "x"
linearity_color = "g"
dense_marker = "v"
dense_color = "r"
deepfried_marker = "8"
deepfried_color = "b"
d_translate_kernel = {
"linear": "Linear",
"chi2_cpd": "Chi2",
"rbf": "Gaussian",
"chi2_exp_cpd": "Exp Chi2"
}
######################
# df initialization
######################
batch_size = 64 # always the same batch size
field_names = ["method_name",
"accuracy_val",
"accuracy_test",
"runtime",
"number_epoch",
"batch_size",
"repr_dim",
"two_layers_dense",
"kernel_deepstrom",
"gamma_kernel",
"constante_sigmoid",
"nb_layer_deepfried",
"subsample_size",
"validation_size",
"seed",
"non_linearity",
"real_nystrom",
"repr_quality",
"train_size",
"dropout",
"dataset"
]
df = pd.read_csv(filepath, names=field_names)
# df = df[df["accuracy_val"] != 'None']
df = df.apply(pd.to_numeric, errors="ignore")
method_names = set(df["method_name"].values)
logger.debug("Compared network types are: {}".format(method_names))
datasets = set(df["dataset"].values)
logger.debug("datasets: {}".format(datasets))
kernel_names = set(df["kernel_deepstrom"].values)
kernel_names.remove("None")
# kernel_names.remove("laplacian")
logger.debug("Kernel functions are: {}".format(kernel_names))
seed_values = set(df["seed"].values)
logger.debug("seed values: {}".format(seed_values))
train_sizes = set(df["train_size"])
logger.debug("train sizes: {}".format(train_sizes))
dropout_values = set(df["dropout"].values)
logger.debug("dropout values: {}".format(dropout_values))
###########################
# df processing
##############
for h, DATANAME in enumerate(datasets):
means_deepstrom = {}
df_dataname = df[df["dataset"] == DATANAME]
for t_size in sorted(list(train_sizes)):
df_tsize = df_dataname[df_dataname["train_size"] == t_size]
for drop_val in dropout_values:
df_drop = df_tsize[df_tsize["dropout"] == drop_val]
# plot deepstrom
# ==============
df_deepstrom = df_drop[df_drop["method_name"] == "deepstrom"]
df_deepstrom["subsample_size"] = df_deepstrom["subsample_size"].astype(np.int)
df_deepstrom_sort = df_deepstrom.sort_values(by=["subsample_size"])
for i, k_name in enumerate(sorted(kernel_names)):
df_deepstrom_sort_kernel = df_deepstrom_sort[df_deepstrom_sort["kernel_deepstrom"] == k_name]
# if k_name != "rbf" and k_name != "chi2_exp_cpd": # if there is no need for sigma value
# lst_df_deepstrom_kernels = [df_deepstrom_sort_kernel]
# else:
# lst_df_deepstrom_kernels = [] # use a list for the possible multiple sigma values
# sigma_values = set(df_deepstrom_sort_kernel["gamma_kernel"].values)
# sigma_values = list(sigma_values)
# logger.debug("sigma values: {}".format(sigma_values))
# for sig_val in sigma_values:
# df_deepstrom_sort_kernel_sigma = df_deepstrom_sort_kernel[df_deepstrom_sort_kernel["gamma_kernel"] == sig_val]
# lst_df_deepstrom_kernels.append(df_deepstrom_sort_kernel_sigma)
# for j, df_deepstrom_kernel in enumerate(lst_df_deepstrom_kernels):
f, ax = plt.subplots()
# non_lin_dfs = {
# "linear": df_deepstrom_kernel[df_deepstrom_kernel["non_linearity"] == "None"],
# }
# get the results of learned nystrom
nys_sizes = set(df_deepstrom_sort_kernel["subsample_size"].values)
logger.debug("Nystrom possible sizes are: {}".format(nys_sizes))
df_deepstrom_kernel_w = df_deepstrom_sort_kernel[df_deepstrom_sort_kernel["real_nystrom"] == False]
np_deepstrom_kernel_w_mean_accuracy_test = np.mean(np.array([
list(df_deepstrom_kernel_w[df_deepstrom_kernel_w["seed"] == seed_v]["accuracy_test"])
for seed_v in seed_values
]), axis=0)
np_deepstrom_kernel_w_std_accuracy_test = np.std(np.array(
[list(df_deepstrom_kernel_w[df_deepstrom_kernel_w["seed"] == seed_v]["accuracy_test"])
for seed_v in
seed_values]), axis=0)
np_param_nbr_deepstrom_kernel_w = (
np.square(np.array(sorted(nys_sizes))) + # m x m
np.array(sorted(nys_sizes)) * output_conv_dim + # m x d
np.array(sorted(nys_sizes)) * nb_classes # m x c
)
ax.errorbar(np_param_nbr_deepstrom_kernel_w,
np_deepstrom_kernel_w_mean_accuracy_test,
np_deepstrom_kernel_w_std_accuracy_test,
marker=learned_nys_marker, color=linearity_color,
label=" Adaptive-$\phi_{nys}$",
linestyle="None", capsize=3)
# get the results of vanilla nystrom
df_deepstrom_kernel_k = df_deepstrom_sort_kernel[df_deepstrom_sort_kernel["real_nystrom"]]
if len(df_deepstrom_kernel_k):
np_deepstrom_kernel_k_mean_accuracy_test = np.mean(
np.array([list(
df_deepstrom_kernel_k[df_deepstrom_kernel_k["seed"] == seed_v]["accuracy_test"])
for seed_v in
seed_values]), axis=0)
np_deepstrom_kernel_k_std_accuracy_test = np.std(
np.array([list(
df_deepstrom_kernel_k[df_deepstrom_kernel_k["seed"] == seed_v]["accuracy_test"])
for seed_v in
seed_values]), axis=0)
np_param_nbr_deepstrom_kernel_k = (
np.square(np.array(sorted(nys_sizes))) + # m x m
np.array(sorted(nys_sizes)) * output_conv_dim + # m x d
np.array(sorted(nys_sizes)) * nb_classes # m x c
)
ax.errorbar(np_param_nbr_deepstrom_kernel_k,
np_deepstrom_kernel_k_mean_accuracy_test,
np_deepstrom_kernel_k_std_accuracy_test,
marker=real_nys_marker, color=linearity_color,
label="$\phi_{nys}$",
linestyle="None", capsize=3)
# plot dense
# ==========
df_dense = df_drop[df_drop["method_name"] == "dense"]
df_dense = df_dense[df_dense["train_size"] == t_size]
repr_dim = set(df_dense["repr_dim"].values)
logger.debug("Tested representation dimension are: {}".format(repr_dim))
df_dense["repr_dim"] = df_dense["repr_dim"].astype(np.int)
df_dense = df_dense.sort_values(by=["repr_dim"])
np_dense_mean_accuracy_test = np.mean(
np.array([list(df_dense[df_dense["seed"] == seed_v]["accuracy_test"]) for seed_v in
seed_values]), axis=0)
np_dense_std_accuracy_test = np.std(
np.array([list(df_dense[df_dense["seed"] == seed_v]["accuracy_test"]) for seed_v in
seed_values]), axis=0)
ax.errorbar(
np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * output_conv_dim +
np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * nb_classes,
np_dense_mean_accuracy_test,
np_dense_std_accuracy_test,
color=dense_color,
marker=dense_marker,
label="$\phi_{nn}$", capsize=3, linestyle="None")
# plot deepfried
# ==============
df_deepfried = df_drop[df_drop["method_name"] == "deepfriedconvnet"]
df_deepfried["nb_layer_deepfried"] = df_deepfried["nb_layer_deepfried"].astype(np.int)
df_deepfried = df_deepfried.sort_values(by=["nb_layer_deepfried"])
nb_layers_deepfried = set(df_deepfried["nb_layer_deepfried"].values)
logger.debug("nb layers deepfried: {}".format(nb_layers_deepfried))
np_deepfried_mean_accuracy_test = []
np_deepfried_std_accuracy_test = []
for l_nb in sorted(nb_layers_deepfried):
df_deepfried_stack = df_deepfried[df_deepfried["nb_layer_deepfried"] == l_nb]
np_deepfried_mean_accuracy_test.append(np.mean(df_deepfried_stack["accuracy_test"]))
np_deepfried_std_accuracy_test.append(np.std(df_deepfried_stack["accuracy_test"]))
ax.errorbar(
[(output_conv_dim * 3 + output_conv_dim * nb_classes) * i for i in nb_layers_deepfried],
np_deepfried_mean_accuracy_test,
np_deepfried_std_accuracy_test,
color=deepfried_color,
marker=deepfried_marker,
linestyle="None",
label=" Adaptive-$\phi_{ff}$", capsize=3)
ax.set_ylim(min_acc, max_acc)
ax.set_ylabel("Accuracy")
ax.set_xticks([1e4, 1e5, 1e6])
if i == 2:
ax.set_xlabel("symlog(# Parameters)")
ax.set_xlabel("symlog(# Parameters)")
ax.legend(bbox_to_anchor=(0.5, -0.20), loc="upper center", ncol=4)
ax.set_xticklabels([1e4, 1e5, 1e6])
# else:
# ax.set_xticklabels([])
ax.set_xscale("symlog")
ax_twin = ax.twiny()
ax_twin.set_xscale("symlog")
ax_twin.set_xlim(ax.get_xlim())
ax_twin.set_xticks(np_param_nbr_deepstrom_kernel_w)
# if i == 0:
ax_twin.set_xlabel("Subsample Size")
ax.set_title(
"{} Kernel - {} - Train size: {} - Not Drop prob: {}".format(d_translate_kernel[k_name],
DATANAME, t_size, drop_val),
y=1.2)
ax_twin.set_xticklabels(sorted(set(df_deepstrom_kernel_w["subsample_size"])))
# else:
# ax.set_title("Noyau {} - {} - Train size: {}".format(d_translate_kernel[k_name], DATANAME, t_size))
# ax_twin.set_xticklabels([])
f.set_size_inches(8, 6)
f.tight_layout()
f.subplots_adjust(bottom=0.3)
base_out_dir = "/home/luc/PycharmProjects/deepFriedConvnets/main/experiments/graph_drawing/paper/cifar100/images"
out_dir_dataset = os.path.join(base_out_dir, str(DATANAME))
if k_name != "rbf" and k_name != "chi2_exp_cpd":
out_name = "acc_param_tsize_{}_{}_{}".format(t_size,
k_name,
str(drop_val).replace(".", "-")
)
else:
out_name = "acc_param_tsize_{}_{}_{}".format(t_size,
k_name,
str(drop_val).replace(".", "-")
)
pathlib.Path(out_dir_dataset).mkdir(parents=True, exist_ok=True)
out_path = os.path.join(out_dir_dataset, out_name)
f.savefig(out_path)
import os
import pathlib
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from skluc.utils import logger
matplotlib.rcParams.update({'font.size': 14})
pd.set_option('display.width', 1000)
if __name__ == "__main__":
# Files parameters
###################
FILENAME = "gathered_results.csv"
DIRNAME = "/home/luc/Resultats/Deepstrom/june_2018/svhn_mnist_cifar_very_big/raw"
filepath = os.path.join(DIRNAME, FILENAME)
# figure parameters
###################
min_acc = 0.00
max_acc = 1.05
# max_acc = 1.0
linewidth = 0.9
output_conv_dim = 512
nb_classes = 10
real_nys_marker = "s"
learned_nys_marker = "x"
linearity_color = "g"
dense_marker = "v"
dense_color = "r"
deepfried_marker = "8"
deepfried_color = "b"
d_translate_kernel = {
"linear": "Linear",
"chi2_cpd": "Chi2",
"rbf": "Gaussian",
"chi2_exp_cpd": "Exp Chi2"
}
######################
# df initialization
######################
batch_size = 64 # always the same batch size
field_names = ["method_name",
"accuracy_val",
"accuracy_test",
"runtime",
"number_epoch",
"batch_size",
"repr_dim",
"two_layers_dense",
"kernel_deepstrom",
"gamma_kernel",
"constante_sigmoid",
"nb_layer_deepfried",
"subsample_size",
"validation_size",
"seed",
"non_linearity",
"real_nystrom",
"repr_quality",
"train_size",
"dropout",
"dataset"
]
df = pd.read_csv(filepath, names=field_names)
# df = df[df["accuracy_val"] != 'None']
df = df.apply(pd.to_numeric, errors="ignore")
method_names = set(df["method_name"].values)
logger.debug("Compared network types are: {}".format(method_names))
datasets = set(df["dataset"].values)
logger.debug("datasets: {}".format(datasets))
kernel_names = set(df["kernel_deepstrom"].values)
kernel_names.remove("None")
# kernel_names.remove("laplacian")
logger.debug("Kernel functions are: {}".format(kernel_names))
seed_values = set(df["seed"].values)
logger.debug("seed values: {}".format(seed_values))
train_sizes = set(df["train_size"])
logger.debug("train sizes: {}".format(train_sizes))
dropout_values = set(df["dropout"].values)
logger.debug("dropout values: {}".format(dropout_values))
###########################
# df processing
##############
for h, DATANAME in enumerate(datasets):
means_deepstrom = {}
df_dataname = df[df["dataset"] == DATANAME]
for t_size in sorted(list(train_sizes)):
df_tsize = df_dataname[df_dataname["train_size"] == t_size]
for drop_val in dropout_values:
df_drop = df_tsize[df_tsize["dropout"] == drop_val]
# plot deepstrom
# ==============
df_deepstrom = df_drop[df_drop["method_name"] == "deepstrom"]
df_deepstrom["subsample_size"] = df_deepstrom["subsample_size"].astype(np.int)
df_deepstrom_sort = df_deepstrom.sort_values(by=["subsample_size"])
for i, k_name in enumerate(sorted(kernel_names)):
df_deepstrom_sort_kernel = df_deepstrom_sort[df_deepstrom_sort["kernel_deepstrom"] == k_name]
# if k_name != "rbf" and k_name != "chi2_exp_cpd": # if there is no need for sigma value
# lst_df_deepstrom_kernels = [df_deepstrom_sort_kernel]
# else:
# lst_df_deepstrom_kernels = [] # use a list for the possible multiple sigma values
# sigma_values = set(df_deepstrom_sort_kernel["gamma_kernel"].values)
# sigma_values = list(sigma_values)
# logger.debug("sigma values: {}".format(sigma_values))
# for sig_val in sigma_values:
# df_deepstrom_sort_kernel_sigma = df_deepstrom_sort_kernel[df_deepstrom_sort_kernel["gamma_kernel"] == sig_val]
# lst_df_deepstrom_kernels.append(df_deepstrom_sort_kernel_sigma)
# for j, df_deepstrom_kernel in enumerate(lst_df_deepstrom_kernels):
f, ax = plt.subplots()
# non_lin_dfs = {
# "linear": df_deepstrom_kernel[df_deepstrom_kernel["non_linearity"] == "None"],
# }
# get the results of learned nystrom
nys_sizes = set(df_deepstrom_sort_kernel["subsample_size"].values)
logger.debug("Nystrom possible sizes are: {}".format(nys_sizes))
df_deepstrom_kernel_w = df_deepstrom_sort_kernel[df_deepstrom_sort_kernel["real_nystrom"] == False]
np_deepstrom_kernel_w_mean_accuracy_test = np.mean(np.array([
list(df_deepstrom_kernel_w[df_deepstrom_kernel_w["seed"] == seed_v]["accuracy_test"])
for seed_v in seed_values
]), axis=0)
np_deepstrom_kernel_w_std_accuracy_test = np.std(np.array(
[list(df_deepstrom_kernel_w[df_deepstrom_kernel_w["seed"] == seed_v]["accuracy_test"])
for seed_v in
seed_values]), axis=0)
np_param_nbr_deepstrom_kernel_w = (
np.square(np.array(sorted(nys_sizes))) + # m x m
np.array(sorted(nys_sizes)) * output_conv_dim + # m x d
np.array(sorted(nys_sizes)) * nb_classes # m x c
)
ax.errorbar(np_param_nbr_deepstrom_kernel_w,
np_deepstrom_kernel_w_mean_accuracy_test,
np_deepstrom_kernel_w_std_accuracy_test,
marker=learned_nys_marker, color=linearity_color,
label=" Adaptive-$\phi_{nys}$",
linestyle="None", capsize=3)
# get the results of vanilla nystrom
df_deepstrom_kernel_k = df_deepstrom_sort_kernel[df_deepstrom_sort_kernel["real_nystrom"]]
if len(df_deepstrom_kernel_k):
np_deepstrom_kernel_k_mean_accuracy_test = np.mean(
np.array([list(
df_deepstrom_kernel_k[df_deepstrom_kernel_k["seed"] == seed_v]["accuracy_test"])
for seed_v in
seed_values]), axis=0)
np_deepstrom_kernel_k_std_accuracy_test = np.std(
np.array([list(
df_deepstrom_kernel_k[df_deepstrom_kernel_k["seed"] == seed_v]["accuracy_test"])
for seed_v in
seed_values]), axis=0)
np_param_nbr_deepstrom_kernel_k = (
np.square(np.array(sorted(nys_sizes))) + # m x m
np.array(sorted(nys_sizes)) * output_conv_dim + # m x d
np.array(sorted(nys_sizes)) * nb_classes # m x c
)
ax.errorbar(np_param_nbr_deepstrom_kernel_k,
np_deepstrom_kernel_k_mean_accuracy_test,
np_deepstrom_kernel_k_std_accuracy_test,
marker=real_nys_marker, color=linearity_color,
label="$\phi_{nys}$",
linestyle="None", capsize=3)
# plot dense
# ==========
df_dense = df_drop[df_drop["method_name"] == "dense"]
df_dense = df_dense[df_dense["train_size"] == t_size]
repr_dim = set(df_dense["repr_dim"].values)
logger.debug("Tested representation dimension are: {}".format(repr_dim))
df_dense["repr_dim"] = df_dense["repr_dim"].astype(np.int)
df_dense = df_dense.sort_values(by=["repr_dim"])
np_dense_mean_accuracy_test = np.mean(
np.array([list(df_dense[df_dense["seed"] == seed_v]["accuracy_test"]) for seed_v in
seed_values]), axis=0)
np_dense_std_accuracy_test = np.std(
np.array([list(df_dense[df_dense["seed"] == seed_v]["accuracy_test"]) for seed_v in
seed_values]), axis=0)
ax.errorbar(
np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * output_conv_dim +
np.array(sorted([int(n) for n in np.unique(df_dense["repr_dim"])])) * nb_classes,
np_dense_mean_accuracy_test,
np_dense_std_accuracy_test,
color=dense_color,
marker=dense_marker,
label="$\phi_{nn}$", capsize=3, linestyle="None")
# plot deepfried
# ==============
df_deepfried = df_drop[df_drop["method_name"] == "deepfriedconvnet"]
df_deepfried["nb_layer_deepfried"] = df_deepfried["nb_layer_deepfried"].astype(np.int)
df_deepfried = df_deepfried.sort_values(by=["nb_layer_deepfried"])
nb_layers_deepfried = set(df_deepfried["nb_layer_deepfried"].values)
logger.debug("nb layers deepfried: {}".format(nb_layers_deepfried))
np_deepfried_mean_accuracy_test = []
np_deepfried_std_accuracy_test = []
for l_nb in sorted(nb_layers_deepfried):
df_deepfried_stack = df_deepfried[df_deepfried["nb_layer_deepfried"] == l_nb]
np_deepfried_mean_accuracy_test.append(np.mean(df_deepfried_stack["accuracy_test"]))
np_deepfried_std_accuracy_test.append(np.std(df_deepfried_stack["accuracy_test"]))
ax.errorbar(
[(output_conv_dim * 3 + output_conv_dim * nb_classes) * i for i in nb_layers_deepfried],
np_deepfried_mean_accuracy_test,
np_deepfried_std_accuracy_test,
color=deepfried_color,
marker=deepfried_marker,
linestyle="None",
label=" Adaptive-$\phi_{ff}$", capsize=3)
ax.set_ylim(min_acc, max_acc)
ax.set_ylabel("Accuracy")
ax.set_xticks([1e4, 1e5, 1e6])
if i == 2:
ax.set_xlabel("symlog(# Parameters)")
ax.set_xlabel("symlog(# Parameters)")
ax.legend(bbox_to_anchor=(0.5, -0.20), loc="upper center", ncol=4)
ax.set_xticklabels([1e4, 1e5, 1e6])
# else:
# ax.set_xticklabels([])
ax.set_xscale("symlog")
ax_twin = ax.twiny()
ax_twin.set_xscale("symlog")
ax_twin.set_xlim(ax.get_xlim())
ax_twin.set_xticks(np_param_nbr_deepstrom_kernel_w)
# if i == 0:
ax_twin.set_xlabel("Subsample Size")
ax.set_title(
"{} Kernel - {} - Train size: {} - Not Drop prob: {}".format(d_translate_kernel[k_name],
DATANAME, t_size, drop_val),
y=1.2)
ax_twin.set_xticklabels(sorted(set(df_deepstrom_kernel_w["subsample_size"])))
# else:
# ax.set_title("Noyau {} - {} - Train size: {}".format(d_translate_kernel[k_name], DATANAME, t_size))
# ax_twin.set_xticklabels([])
f.set_size_inches(8, 6)
f.tight_layout()
f.subplots_adjust(bottom=0.3)
base_out_dir = "/home/luc/PycharmProjects/deepFriedConvnets/main/experiments/graph_drawing/paper/very_big_all_datasets/images"
out_dir_dataset = os.path.join(base_out_dir, str(DATANAME))
if k_name != "rbf" and k_name != "chi2_exp_cpd":
out_name = "acc_param_tsize_{}_{}_{}".format(t_size,
k_name,
str(drop_val).replace(".", "-")
)
else:
out_name = "acc_param_tsize_{}_{}_{}".format(t_size,
k_name,
str(drop_val).replace(".", "-")
)
pathlib.Path(out_dir_dataset).mkdir(parents=True, exist_ok=True)
out_path = os.path.join(out_dir_dataset, out_name)
f.savefig(out_path)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment