Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
deepFriedConvnet
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Luc Giffon
deepFriedConvnet
Commits
64ca4df5
Commit
64ca4df5
authored
7 years ago
by
Luc Giffon
Browse files
Options
Downloads
Patches
Plain Diff
build stacked fastfood - need to remove diagonal fastfood and do the stacks implicitly
parent
dc036659
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
main/convnet_random.py
+36
-25
36 additions, 25 deletions
main/convnet_random.py
with
36 additions
and
25 deletions
main/convnet_random.py
+
36
−
25
View file @
64ca4df5
...
...
@@ -42,9 +42,9 @@ def convolution(input):
with
tf
.
name_scope
(
"
conv_pool_1
"
):
# 32 is the number of filter we'll use. e.g. the number of different
# shapes this layer is able to recognize
W_conv1
=
weight_variable
([
5
,
5
,
1
,
3
2
])
W_conv1
=
weight_variable
([
5
,
5
,
1
,
2
0
])
tf
.
summary
.
histogram
(
"
weights conv1
"
,
W_conv1
)
b_conv1
=
bias_variable
([
3
2
])
b_conv1
=
bias_variable
([
2
0
])
tf
.
summary
.
histogram
(
"
biases conv1
"
,
b_conv1
)
# -1 is here to keep the total size constant (784)
h_conv1
=
tf
.
nn
.
relu
(
conv2d
(
input
,
W_conv1
)
+
b_conv1
)
...
...
@@ -52,9 +52,9 @@ def convolution(input):
h_pool1
=
max_pool_2x2
(
h_conv1
)
with
tf
.
name_scope
(
"
conv_pool_2
"
):
W_conv2
=
weight_variable
([
5
,
5
,
32
,
64
])
W_conv2
=
weight_variable
([
5
,
5
,
20
,
50
])
tf
.
summary
.
histogram
(
"
weights conv2
"
,
W_conv2
)
b_conv2
=
bias_variable
([
64
])
b_conv2
=
bias_variable
([
50
])
tf
.
summary
.
histogram
(
"
biases conv2
"
,
b_conv2
)
h_conv2
=
tf
.
nn
.
relu
(
conv2d
(
h_pool1
,
W_conv2
)
+
b_conv2
)
tf
.
summary
.
histogram
(
"
act conv2
"
,
h_conv2
)
...
...
@@ -77,7 +77,7 @@ def random_biases(shape):
# --- Fast Food Naive --- #
def
G_variable
(
d
,
diag
=
True
):
def
G_variable
(
d
,
diag
=
True
,
trainable
=
False
):
"""
Return a Gaussian Random diagonal matrix converted into Tensorflow Variable.
...
...
@@ -92,11 +92,10 @@ def G_variable(d, diag=True):
else
:
G
=
np
.
random
.
normal
(
size
=
d
).
astype
(
np
.
float32
)
G_norm
=
np
.
linalg
.
norm
(
G
,
ord
=
2
)
print
(
"
Norm of G is: {}
"
.
format
(
G_norm
))
return
tf
.
Variable
(
G
,
name
=
"
G
"
,
trainable
=
False
),
G_norm
return
tf
.
Variable
(
G
,
name
=
"
G
"
,
trainable
=
trainable
),
G_norm
def
B_variable
(
d
,
diag
=
True
):
def
B_variable
(
d
,
diag
=
True
,
trainable
=
False
):
"""
Return a random diagonal matrix of -1 and 1 picked uniformly into Tensorflow Variable.
...
...
@@ -108,7 +107,7 @@ def B_variable(d, diag=True):
B
=
np
.
diag
(
np
.
random
.
choice
([
-
1
,
1
],
size
=
d
,
replace
=
True
)).
astype
(
np
.
float32
)
else
:
B
=
np
.
random
.
choice
([
-
1
,
1
],
size
=
d
,
replace
=
True
).
astype
(
np
.
float32
)
return
tf
.
Variable
(
B
,
name
=
"
B
"
,
trainable
=
Fals
e
)
return
tf
.
Variable
(
B
,
name
=
"
B
"
,
trainable
=
trainabl
e
)
def
P_variable
(
d
):
...
...
@@ -138,7 +137,7 @@ def H_variable(d):
return
tf
.
Variable
(
H
,
name
=
"
H
"
,
trainable
=
False
)
def
S_variable
(
d
,
G_norm
,
diag
=
True
):
def
S_variable
(
d
,
G_norm
,
diag
=
True
,
trainable
=
False
):
"""
Return a scaling diagonal matrix of random values picked from a chi distribution.
...
...
@@ -154,7 +153,7 @@ def S_variable(d, G_norm, diag=True):
S
=
np
.
diag
((
1
/
G_norm
)
*
scipy
.
stats
.
chi
.
rvs
(
d
,
size
=
d
)).
astype
(
np
.
float32
)
else
:
S
=
(
1
/
G_norm
)
*
scipy
.
stats
.
chi
.
rvs
(
d
,
size
=
d
).
astype
(
np
.
float32
)
return
tf
.
Variable
(
S
,
name
=
"
S
"
,
trainable
=
Fals
e
)
return
tf
.
Variable
(
S
,
name
=
"
S
"
,
trainable
=
trainabl
e
)
# --- Hadamard utils --- #
...
...
@@ -198,9 +197,8 @@ def random_features(conv_out, sigma):
return
h1_final
def
fast_food
(
conv_out
,
sigma
,
diag
=
True
,
trainable
=
False
):
# todo use te trainable parameter
with
tf
.
name_scope
(
"
fastfood
"
):
def
fast_food
(
conv_out
,
sigma
,
nbr_stack
=
1
,
diag
=
True
,
trainable
=
False
,
name
=
"
fastfood
"
):
with
tf
.
name_scope
(
name
+
"
_diag=
"
+
str
(
diag
)
+
"
_sigma=
"
+
str
(
sigma
)):
init_dim
=
np
.
prod
([
s
.
value
for
s
in
conv_out
.
shape
if
s
.
value
is
not
None
])
final_dim
=
int
(
dimensionality_constraints
(
init_dim
))
padding
=
final_dim
-
init_dim
...
...
@@ -208,15 +206,15 @@ def fast_food(conv_out, sigma, diag=True, trainable=False):
paddings
=
tf
.
constant
([[
0
,
0
],
[
0
,
padding
]])
conv_out2
=
tf
.
pad
(
conv_out2
,
paddings
,
"
CONSTANT
"
)
G
,
G_norm
=
G_variable
(
final_dim
,
diag
=
diag
)
G
,
G_norm
=
G_variable
(
final_dim
,
diag
=
diag
,
trainable
=
trainable
)
tf
.
summary
.
histogram
(
"
weights G
"
,
G
)
B
=
B_variable
(
final_dim
,
diag
=
diag
)
B
=
B_variable
(
final_dim
,
diag
=
diag
,
trainable
=
trainable
)
tf
.
summary
.
histogram
(
"
weights B
"
,
B
)
H
=
H_variable
(
final_dim
)
tf
.
summary
.
histogram
(
"
weights H
"
,
H
)
P
=
P_variable
(
final_dim
)
tf
.
summary
.
histogram
(
"
weights P
"
,
P
)
S
=
S_variable
(
final_dim
,
G_norm
,
diag
=
diag
)
S
=
S_variable
(
final_dim
,
G_norm
,
diag
=
diag
,
trainable
=
trainable
)
tf
.
summary
.
histogram
(
"
weights S
"
,
S
)
if
diag
:
...
...
@@ -257,8 +255,17 @@ def fully_connected(conv_out):
return
h_fc1
def
stacked_fastfood
(
input_
,
nbr
,
sigma
,
diag
=
False
,
trainable
=
False
):
l_outputs
=
[]
for
i
in
range
(
nbr
):
output
=
fast_food
(
input_
,
sigma
,
diag
=
diag
,
trainable
=
trainable
,
name
=
"
fastfood
"
+
str
(
i
))
l_outputs
.
append
(
output
)
outputs_stacked
=
tf
.
concat
(
l_outputs
,
axis
=
1
)
return
outputs_stacked
if
__name__
==
'
__main__
'
:
SIGMA
=
100
.0
SIGMA
=
5
.0
print
(
"
Sigma = {}
"
.
format
(
SIGMA
))
with
tf
.
Graph
().
as_default
():
...
...
@@ -274,9 +281,14 @@ if __name__ == '__main__':
h_conv
=
convolution
(
x_image
)
# h_conv = x
# out_fc = fully_connected(h_conv) # 95% accuracy
# out_fc = fast_food(h_conv, SIGMA) # 83% accuracy (conv) | 56% accuracy (noconv)
# out_fc = fast_food(h_conv, SIGMA, diag=False) # 84% accuracy (conv) | 59% accuracy (noconv)
out_fc
=
random_features
(
h_conv
,
SIGMA
)
# 82% accuracy (conv) | 47% accuracy (noconv)
# out_fc = tf.nn.relu(fast_food(h_conv, SIGMA)) # 83% accuracy (conv) | 56% accuracy (noconv)
# out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, diag=False)) # 84% accuracy (conv) | 59% accuracy (noconv)
# out_fc = tf.nn.relu(fast_food(h_conv, SIGMA, diag=False, trainable=True)) # 84% accuracy (conv) | 59% accuracy (noconv)
# todo: faire une implémentation moins naive: il doit y avoir des blocs dans tf uniquement lorsque j'utilise des matrices
# diagonales, sinon je n'ai besoin que de plusieurs lignes pour la matrice de hadamard
out_fc
=
tf
.
nn
.
relu
(
stacked_fastfood
(
h_conv
,
2
,
SIGMA
,
diag
=
False
,
trainable
=
True
))
# 84% accuracy (conv) | 59% accuracy (noconv)
# out_fc = fast_food(h_conv, SIGMA, diag=True, trainable=True) # 84% accuracy (conv) | 59% accuracy (noconv)
# out_fc = random_features(h_conv, SIGMA) # 82% accuracy (conv) | 47% accuracy (noconv)
# classification
with
tf
.
name_scope
(
"
fc_2
"
):
...
...
@@ -315,15 +327,14 @@ if __name__ == '__main__':
# Create a session for running Ops on the Graph.
sess
=
tf
.
Session
()
# Instantiate a SummaryWriter to output summaries and the Graph.
summary_writer
=
tf
.
summary
.
FileWriter
(
"
results_deepfried
"
)
summary_writer
=
tf
.
summary
.
FileWriter
(
"
results_deepfried
_stacked
"
)
summary_writer
.
add_graph
(
sess
.
graph
)
# Initialize all Variable objects
sess
.
run
(
init
)
# actual learning
started
=
t
.
time
()
for
i
in
range
(
5
00
):
batch
=
mnist
.
train
.
next_batch
(
50
)
for
i
in
range
(
20
00
):
batch
=
mnist
.
train
.
next_batch
(
64
)
feed_dict
=
{
x
:
batch
[
0
],
y_
:
batch
[
1
],
keep_prob
:
0.5
}
# le _ est pour capturer le retour de "train_optimizer" qu'il faut appeler
# pour calculer le gradient mais dont l'output ne nous interesse pas
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment