Skip to content
Snippets Groups Projects
Commit 3330f3a6 authored by Luc Giffon's avatar Luc Giffon
Browse files

graph drawing classif_end_to_end_subsample_conv_hand_with_augment with mixed results

parent ba8b5d1d
Branches
No related tags found
No related merge requests found
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import pandas as pd import pandas as pd
import matplotlib import matplotlib
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pathlib import pathlib
import os import os
from skluc.main.utils import logger from skluc.main.utils import logger
matplotlib.rcParams.update({'font.size': 14}) matplotlib.rcParams.update({'font.size': 14})
pd.set_option('display.expand_frame_repr', False) pd.set_option('display.expand_frame_repr', False)
pd.set_option("display.max_columns", 100) pd.set_option("display.max_columns", 100)
``` ```
%% Output %% Output
2018-11-12 13:00:51,016 [20064] DEBUG matplotlib.backends: backend module://ipykernel.pylab.backend_inline version unknown 2018-11-21 16:40:40,775 [24133] DEBUG matplotlib.backends: backend module://ipykernel.pylab.backend_inline version unknown
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def get_sorted_acc_for_dataset(df_, dataset): def get_sorted_acc_for_dataset(df_, dataset):
df_dataset = df_[df_.dataset == dataset] df_dataset = df_[df_.dataset == dataset]
df_dataset = df_dataset.sort_values(by="test_acc", ascending=False) df_dataset = df_dataset.sort_values(by="test_acc", ascending=False)
return df_dataset return df_dataset
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def build_df(): def build_df():
filepath = os.path.join(DIRNAME_BIG, FILENAME_BIG) filepath = os.path.join(DIRNAME_BIG, FILENAME_BIG)
df = pd.read_csv(filepath) df = pd.read_csv(filepath)
df = df.apply(pd.to_numeric, errors="ignore") df = df.apply(pd.to_numeric, errors="ignore")
df = df.drop_duplicates() df = df.drop_duplicates()
col_to_delete = [ '--chi-square-PD-kernel', '--chi-square-kernel', col_to_delete = [ '--chi-square-PD-kernel', '--chi-square-kernel',
'--cifar10', '--cifar100', '--exp-chi-square-kernel', '--cifar10', '--cifar100', '--exp-chi-square-kernel',
'--intercept-constant', '--laplacian-kernel', '--linear-kernel', '--intercept-constant', '--laplacian-kernel', '--linear-kernel',
'--mnist', '--quiet', '--rbf-kernel', '--mnist', '--quiet', '--rbf-kernel',
'--sigmoid-kernel', '--stacked-kernel', '--sumed-kernel', '--svhn', '--sigmoid-kernel', '--stacked-kernel', '--sumed-kernel', '--svhn',
'--tensorboard', '--validation-size', '--tensorboard', '--validation-size',
'deepstrom', "--gamma", 'deepstrom', "--gamma",
"--non-linear", "--non-linearity", "--num-epoch", "--seed", "--non-linear", "--non-linearity", "--num-epoch", "--seed",
"--train-size", "activation_function", "--train-size", "activation_function",
"deepstrom_activation", "--real-nystrom" "deepstrom_activation", "--real-nystrom"
] ]
for c in col_to_delete: for c in col_to_delete:
df = df.drop([c], axis=1) df = df.drop([c], axis=1)
return df return df
DIRNAME_BIG = "/home/luc/Resultats/Deepstrom/november_2018/classif_end_to_end_subsample_conv_hand" DIRNAME_BIG = "/home/luc/Resultats/Deepstrom/november_2018/end_to_end_subsample_conv_hand_with_augment"
FILENAME_BIG = "gathered_results.csv" FILENAME_BIG = "gathered_results.csv"
df = build_df() df = build_df()
display(df) display(df)
``` ```
%% Output %% Output
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
get_sorted_acc_for_dataset(df, "mnist") get_sorted_acc_for_dataset(df, "mnist")
``` ```
%% Output %% Output
--batch-size --nys-size --out-dim --subs-every dataset kernel test_acc test_eval_time training_time val_acc val_eval_time --batch-size --nys-size --out-dim --subs-every dataset kernel test_acc test_eval_time training_time val_acc val_eval_time file_timestamp
6 64 128 None 1 mnist chi2_cpd 0.9811 0.172114 1250.739039 0.9792 0.223121 0 64 64 None 50 mnist chi2_cpd 0.8990 0.064964 26350.607944 0.8969 0.115967 1542209490
13 64 256 None 1 mnist chi2_cpd 0.9810 0.316930 1750.582823 0.9796 0.320844 13 64 256 None 50 mnist chi2_cpd 0.8930 0.159199 13036.495552 0.8919 0.163529 1542213923
2 64 64 None 1 mnist chi2_cpd 0.9808 0.067135 743.633713 0.9794 0.071094 14 64 128 None 50 mnist chi2_cpd 0.8918 0.172438 18421.614211 0.8869 0.222979 1542211961
11 64 16 None 1 mnist chi2_cpd 0.9774 0.047079 709.143471 0.9740 0.058902 17 64 512 None 50 mnist chi2_cpd 0.8886 0.561707 12977.160620 0.8778 0.575028 1542215550
5 64 512 None 1 mnist chi2_cpd 0.9756 0.251814 1553.871543 0.9680 0.282160 19 64 16 None 50 mnist chi2_cpd 0.8439 0.056367 10432.334221 0.8413 0.090400 1542205108
3 64 8 None 1 mnist chi2_cpd 0.9534 0.055799 907.542290 0.9486 0.102491 7 64 8 None 50 mnist chi2_cpd 0.8124 0.070989 13508.359984 0.8031 0.077030 1542202109
15 64 4 None 1 mnist chi2_cpd 0.5504 0.079746 947.165308 0.5396 0.085806 6 64 4 None 50 mnist chi2_cpd 0.5502 0.033261 12351.705050 0.5486 0.068031 1542199505
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
get_sorted_acc_for_dataset(df, "cifar10") get_sorted_acc_for_dataset(df, "cifar10")
``` ```
%% Output %% Output
--batch-size --nys-size --out-dim --subs-every dataset kernel test_acc test_eval_time training_time val_acc val_eval_time --batch-size --nys-size --out-dim --subs-every dataset kernel test_acc test_eval_time training_time val_acc val_eval_time file_timestamp
12 64 512 None 1 cifar10 chi2_cpd 0.1003 1.599234 15467.515219 0.1027 2.256960 5 64 512 None 50 cifar10 chi2_cpd 0.8712 4.787930 56967.586779 0.8787 7.037983 1542231301
7 64 8 None 1 cifar10 chi2_cpd 0.1000 3.588938 16039.019309 0.0947 5.784028 16 64 256 None 50 cifar10 chi2_cpd 0.8688 4.033883 55950.356706 0.8736 6.381946 1542231096
17 64 64 None 1 cifar10 chi2_cpd 0.1000 3.969165 18917.706158 0.0947 6.365395 10 64 128 None 50 cifar10 chi2_cpd 0.8660 4.048859 52158.011988 0.8743 6.378982 1542230395
18 64 128 None 1 cifar10 chi2_cpd 0.1000 1.360681 9864.623720 0.0947 1.949636 2 64 64 None 50 cifar10 chi2_cpd 0.8572 3.840925 35606.153394 0.8645 5.962165 1542228549
1 64 4 None 1 cifar10 chi2_cpd 0.1000 3.628797 15535.717329 0.1006 5.754008 8 64 8 None 50 cifar10 chi2_cpd 0.7714 3.931015 52235.161493 0.7742 6.280008 1542226665
8 64 256 None 1 cifar10 chi2_cpd 0.1000 1.338811 25427.858178 0.1006 1.951765 9 64 4 None 50 cifar10 chi2_cpd 0.4169 3.507496 50602.315975 0.4124 5.679052 1542215637
16 64 16 None 1 cifar10 chi2_cpd 0.1000 1.098922 6856.143636 0.1006 1.634924 20 64 16 None 50 cifar10 chi2_cpd 0.1140 1.127676 32099.498744 0.1219 1.698693 1542226974
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
get_sorted_acc_for_dataset(df, "svhn") get_sorted_acc_for_dataset(df, "svhn")
``` ```
%% Output %% Output
--batch-size --nys-size --out-dim --subs-every dataset kernel test_acc test_eval_time training_time val_acc val_eval_time --batch-size --nys-size --out-dim --subs-every dataset kernel test_acc test_eval_time training_time val_acc val_eval_time file_timestamp
0 64 64 None 1 svhn chi2_cpd 0.196694 3.208492 13886.173297 0.1881 1.850464 11 64 128 None 50 svhn chi2_cpd 0.949639 9.729126 74122.297888 0.9451 5.790046 1542264179
4 64 16 None 1 svhn chi2_cpd 0.196694 2.950274 11086.446670 0.1881 1.672408 4 64 512 None 50 svhn chi2_cpd 0.949370 4.025452 75948.217098 0.9427 2.080920 1542266919
9 64 128 None 1 svhn chi2_cpd 0.196694 3.169210 16867.399688 0.1881 1.771410 1 64 8 None 50 svhn chi2_cpd 0.196694 3.221060 153978.042774 0.1881 1.785691 1542242240
10 64 512 None 1 svhn chi2_cpd 0.196694 12.052618 61869.212071 0.1881 6.758391 3 64 256 None 50 svhn chi2_cpd 0.196694 10.474136 116107.833837 0.1881 6.086989 1542266269
14 64 8 None 1 svhn chi2_cpd 0.196694 3.106948 10786.248278 0.1881 1.732185 12 64 4 None 50 svhn chi2_cpd 0.196694 3.032919 153685.657897 0.1881 1.741903 1542235855
19 64 256 None 1 svhn chi2_cpd 0.196694 11.152896 45104.915969 0.1881 6.566199 15 64 64 None 50 svhn chi2_cpd 0.196694 3.062441 72432.295633 0.1881 2.894550 1542259094
20 64 4 None 1 svhn chi2_cpd 0.196694 10.198356 24755.796742 0.1881 6.264580 18 64 16 None 50 svhn chi2_cpd 0.196694 2.846475 72815.976671 0.1881 1.661851 1542255462
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
......
# coding: utf-8
# In[1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pathlib
import os
from skluc.main.utils import logger
matplotlib.rcParams.update({'font.size': 14})
pd.set_option('display.expand_frame_repr', False)
# In[7]:
def build_df(dirname, filename):
filepath = os.path.join(dirname, filename)
df = pd.read_csv(filepath)
df = df.apply(pd.to_numeric, errors="ignore")
df = df.drop_duplicates()
col_to_delete = ['--batch-size', '--chi-square-PD-kernel', '--chi-square-kernel',
'--cifar10', '--cifar100', '--exp-chi-square-kernel',
'--intercept-constant', '--laplacian-kernel', '--linear-kernel',
'--mnist', '--quiet', '--rbf-kernel',
'--sigmoid-kernel', '--stacked-kernel', '--sumed-kernel', '--svhn',
'--tensorboard', '--validation-size',
'deepfriedconvnet', 'deepstrom','dense', "--gamma", "--nb-stack",
"--non-linear", "--non-linearity", "--num-epoch", "--seed",
"--train-size", "--second-layer-size", "activation_function",
"deepstrom_activation", "--real-fastfood", "--real-nystrom"
]
for c in col_to_delete:
try:
df = df.drop([c], axis=1)
except KeyError as e:
print("pass", e)
return df
# In[8]:
DIRNAME = "/home/luc/Resultats/Deepstrom/november_2018/end_to_end_with_augment"
FILENAME = "gathered_results.csv"
df_no_conv_hand = build_df(DIRNAME, FILENAME)
# In[9]:
DIRNAME = "/home/luc/Resultats/Deepstrom/november_2018/end_to_end_subsample_conv_hand_with_augment"
FILENAME = "gathered_results.csv"
df_conv_hand = build_df(DIRNAME, FILENAME)
df = pd.concat([df_conv_hand, df_no_conv_hand], axis=0, ignore_index=True, sort=True)
df.loc[df["--subs-every"] != 50., "--subs-every"] = 1
df.loc[df["network"].isnull(), "network"] = "deepstrom"
# In[40]:
nb_classes_datasets = {
"svhn": 10,
"cifar10": 10,
"mnist": 10,
"cifar100": 100
}
nb_feature_convs = {
"svhn": 512,
"cifar10": 512,
"mnist": 16,
"cifar100": 512
}
conv_status = {
1: "no_conv_hand",
50: "conv_hand"
}
min_acc = 0
max_acc = 1
# In[41]:
def post_processing_figures(f, ax, nbparamdeepstrom, subsample_sizes):
ax.set_ylim(min_acc, max_acc)
ax.set_ylabel("Accuracy")
ax.set_xticks([1e4, 1e5, 1e6])
ax.set_xlabel("# Learnable Parameters")
ax.legend(bbox_to_anchor=(0.5, -0.20), loc="upper center", ncol=2)
ax.set_xticklabels([1e4, 1e5, 1e6])
ax.set_xscale("symlog")
ax_twin = ax.twiny()
ax_twin.set_xscale("symlog")
ax_twin.set_xlim(ax.get_xlim())
ax_twin.set_xticks(sorted(nbparamdeepstrom))
ax_twin.set_xticklabels(sorted(subsample_sizes))
ax_twin.set_xlabel("Subsample Size")
ax.set_title("{}".format(DATANAME), y=1.2)
f.set_size_inches(8, 6)
f.tight_layout()
f.subplots_adjust(bottom=0.3)
out_name = "end_to_end_{}".format(DATANAME)
base_out_dir = os.path.abspath(__file__.split(".")[0])
base_out_dir_path = pathlib.Path(base_out_dir) / "images"
base_out_dir_path.mkdir(parents=True, exist_ok=True)
out_path = base_out_dir_path / out_name
logger.debug(out_path)
f.savefig(out_path)
# In[42]:
method_names = set(df["network"].values)
kernel_names = set(df["kernel"].values)
kernel_names.remove("None")
repr_dim = set(df["--out-dim"].values)
repr_dim.remove("None") # dtype: str
nys_size = set(df["--nys-size"].values)
nys_size.remove("None")
datasets = set(df["dataset"])
subs_everies = set(df["--subs-every"].values)
logger.debug("Nystrom possible sizes are: {}".format(nys_size))
logger.debug("Kernel functions are: {}".format(kernel_names))
logger.debug("Compared network types are: {}".format(method_names))
logger.debug("Tested representation dimension are: {}".format(repr_dim))
logger.debug(f"Tested sub every: {subs_everies}")
for DATANAME in datasets:
df_data = df[df["dataset"] == DATANAME]
nb_classes_dataset = nb_classes_datasets[DATANAME]
nb_feature_conv = nb_feature_convs[DATANAME]
f, ax = plt.subplots()
for k_name in kernel_names:
df_kernel = df_data[df_data["kernel"] == k_name]
for sub_every in subs_everies:
df_sub_every = df_kernel[df_kernel["--subs-every"] == sub_every]
accuracies_kernel = df_sub_every["test_acc"]
subsample_sizes_kernel = df_sub_every["--nys-size"].astype(int)
np_param = (np.square(subsample_sizes_kernel) + # m x m
subsample_sizes_kernel * nb_classes_dataset) # m x c
sorted_idx = np.argsort(np_param.values)
xx = np_param.values[sorted_idx]
yy = accuracies_kernel.values[sorted_idx].astype(float)
ax.plot(xx, yy, marker="x", label=f"Deepstrom {k_name} - {conv_status[int(sub_every)]}")
df_dense = df_data[df_data["network"] == "dense"]
accuracies_dense = df_dense["test_acc"]
out_dim_dense = df_dense["--out-dim"].astype(int)
np_param_dense = (nb_feature_conv * out_dim_dense + # d x D
out_dim_dense * nb_classes_dataset) # D x c
sorted_idx_dense = np.argsort(np_param_dense.values)
xx = np_param_dense.values[sorted_idx_dense]
yy = accuracies_dense.values[sorted_idx_dense].astype(float)
ax.plot(xx, yy, marker="o", label=f"Dense")
post_processing_figures(f, ax, np_param, subsample_sizes_kernel)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment