Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Franck Dary
macaon
Commits
3c3acb33
Commit
3c3acb33
authored
Apr 30, 2021
by
Franck Dary
Browse files
Added option memcheck to train
parent
e496576e
Changes
5
Hide whitespace changes
Inline
Side-by-side
common/include/util.hpp
View file @
3c3acb33
...
...
@@ -53,6 +53,7 @@ bool isUrl(const std::string & s);
bool
isNumber
(
const
std
::
string
&
s
);
std
::
string
getTime
();
std
::
string
getMemUsage
();
long
float2long
(
float
f
);
float
long2float
(
long
l
);
...
...
common/src/util.cpp
View file @
3c3acb33
...
...
@@ -2,6 +2,9 @@
#include
"utf8.hpp"
#include
<ctime>
#include
<algorithm>
#include
<iostream>
#include
<fstream>
#include
<unistd.h>
#include
"upper2lower"
float
util
::
long2float
(
long
l
)
...
...
@@ -236,6 +239,24 @@ std::string util::getTime()
return
std
::
string
(
buffer
);
}
std
::
string
util
::
getMemUsage
()
{
float
vm_usage
=
0.0
;
float
resident_set
=
0.0
;
unsigned
long
vsize
;
long
rss
;
std
::
string
ignore
;
std
::
ifstream
ifs
(
"/proc/self/stat"
,
std
::
ios_base
::
in
);
ifs
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
ignore
>>
vsize
>>
rss
;
long
page_size_kb
=
sysconf
(
_SC_PAGE_SIZE
)
/
1024
;
// in case x86-64 is configured to use 2MB pages
vm_usage
=
vsize
/
1024.0
;
resident_set
=
rss
*
page_size_kb
;
return
fmt
::
format
(
"Virtual:{:.2f}Go Physical:{:.2f}Go"
,
vm_usage
/
1000000.0
,
resident_set
/
1000000.0
);
}
bool
util
::
choiceWithProbability
(
float
probability
)
{
int
maxVal
=
100000
;
...
...
trainer/include/Trainer.hpp
View file @
3c3acb33
...
...
@@ -55,13 +55,13 @@ class Trainer
private
:
void
extractExamples
(
std
::
vector
<
SubConfig
>
&
configs
,
bool
debug
,
std
::
filesystem
::
path
dir
,
int
epoch
,
bool
dynamicOracle
,
float
explorationThreshold
);
void
extractExamples
(
std
::
vector
<
SubConfig
>
&
configs
,
bool
debug
,
std
::
filesystem
::
path
dir
,
int
epoch
,
bool
dynamicOracle
,
float
explorationThreshold
,
bool
memcheck
);
float
processDataset
(
DataLoader
&
loader
,
bool
train
,
bool
printAdvancement
,
int
nbExamples
);
public
:
Trainer
(
ReadingMachine
&
machine
,
int
batchSize
);
void
createDataset
(
std
::
vector
<
BaseConfig
>
&
goldConfigs
,
bool
debug
,
std
::
filesystem
::
path
dir
,
int
epoch
,
bool
dynamicOracle
,
float
explorationThreshold
);
void
createDataset
(
std
::
vector
<
BaseConfig
>
&
goldConfigs
,
bool
debug
,
std
::
filesystem
::
path
dir
,
int
epoch
,
bool
dynamicOracle
,
float
explorationThreshold
,
bool
memcheck
);
void
extractActionSequence
(
BaseConfig
&
config
);
void
makeDataLoader
(
std
::
filesystem
::
path
dir
);
void
makeDevDataLoader
(
std
::
filesystem
::
path
dir
);
...
...
trainer/src/MacaonTrain.cpp
View file @
3c3acb33
...
...
@@ -22,6 +22,7 @@ po::options_description MacaonTrain::getOptionsDescription()
opt
.
add_options
()
(
"debug,d"
,
"Print debuging infos on stderr"
)
(
"silent"
,
"Don't print speed and progress"
)
(
"memcheck"
,
"Regularly print memory usage on stderr"
)
(
"devScore"
,
"Compute score on dev instead of loss (slower)"
)
(
"mcd"
,
po
::
value
<
std
::
string
>
()
->
default_value
(
"ID,FORM,LEMMA,UPOS,XPOS,FEATS,HEAD,DEPREL"
),
"Comma separated column names that describes the input/output format"
)
...
...
@@ -133,6 +134,7 @@ int MacaonTrain::main()
auto
nbEpoch
=
variables
[
"nbEpochs"
].
as
<
int
>
();
auto
batchSize
=
variables
[
"batchSize"
].
as
<
int
>
();
bool
debug
=
variables
.
count
(
"debug"
)
==
0
?
false
:
true
;
bool
memcheck
=
variables
.
count
(
"memcheck"
)
==
0
?
false
:
true
;
bool
printAdvancement
=
!
debug
&&
variables
.
count
(
"silent"
)
==
0
?
true
:
false
;
bool
computeDevScore
=
variables
.
count
(
"devScore"
)
==
0
?
false
:
true
;
auto
machineContent
=
variables
[
"machine"
].
as
<
std
::
string
>
();
...
...
@@ -267,11 +269,11 @@ int MacaonTrain::main()
if
(
trainStrategy
[
currentEpoch
].
count
(
Trainer
::
TrainAction
::
ExtractGold
)
or
trainStrategy
[
currentEpoch
].
count
(
Trainer
::
TrainAction
::
ExtractDynamic
))
{
machine
.
setDictsState
(
trainStrategy
[
currentEpoch
].
count
(
Trainer
::
TrainAction
::
ExtractDynamic
)
?
Dict
::
State
::
Closed
:
Dict
::
State
::
Open
);
trainer
.
createDataset
(
goldConfigs
,
debug
,
modelPath
/
"examples/train"
,
currentEpoch
,
trainStrategy
[
currentEpoch
].
count
(
Trainer
::
TrainAction
::
ExtractDynamic
),
explorationThreshold
);
trainer
.
createDataset
(
goldConfigs
,
debug
,
modelPath
/
"examples/train"
,
currentEpoch
,
trainStrategy
[
currentEpoch
].
count
(
Trainer
::
TrainAction
::
ExtractDynamic
),
explorationThreshold
,
memcheck
);
if
(
!
computeDevScore
)
{
machine
.
setDictsState
(
Dict
::
State
::
Closed
);
trainer
.
createDataset
(
devGoldConfigs
,
debug
,
modelPath
/
"examples/dev"
,
currentEpoch
,
trainStrategy
[
currentEpoch
].
count
(
Trainer
::
TrainAction
::
ExtractDynamic
),
explorationThreshold
);
trainer
.
createDataset
(
devGoldConfigs
,
debug
,
modelPath
/
"examples/dev"
,
currentEpoch
,
trainStrategy
[
currentEpoch
].
count
(
Trainer
::
TrainAction
::
ExtractDynamic
),
explorationThreshold
,
memcheck
);
}
}
if
(
trainStrategy
[
currentEpoch
].
count
(
Trainer
::
TrainAction
::
ResetParameters
)
or
trainStrategy
[
currentEpoch
].
count
(
Trainer
::
TrainAction
::
ResetOptimizer
))
...
...
@@ -392,6 +394,8 @@ int MacaonTrain::main()
std
::
FILE
*
f
=
std
::
fopen
(
trainInfos
.
c_str
(),
"a"
);
fmt
::
print
(
f
,
"{}
\t
{}
\n
"
,
iterStr
,
devScoreMean
);
std
::
fclose
(
f
);
if
(
memcheck
)
fmt
::
print
(
stderr
,
"[{}] Memory : {}
\n
"
,
util
::
getTime
(),
util
::
getMemUsage
());
}
}
...
...
trainer/src/Trainer.cpp
View file @
3c3acb33
...
...
@@ -18,7 +18,7 @@ void Trainer::makeDevDataLoader(std::filesystem::path dir)
devDataLoader
=
torch
::
data
::
make_data_loader
(
*
devDataset
,
torch
::
data
::
DataLoaderOptions
(
batchSize
).
workers
(
0
).
max_jobs
(
0
));
}
void
Trainer
::
createDataset
(
std
::
vector
<
BaseConfig
>
&
goldConfigs
,
bool
debug
,
std
::
filesystem
::
path
dir
,
int
epoch
,
bool
dynamicOracle
,
float
explorationThreshold
)
void
Trainer
::
createDataset
(
std
::
vector
<
BaseConfig
>
&
goldConfigs
,
bool
debug
,
std
::
filesystem
::
path
dir
,
int
epoch
,
bool
dynamicOracle
,
float
explorationThreshold
,
bool
memcheck
)
{
std
::
vector
<
SubConfig
>
configs
;
for
(
auto
&
goldConfig
:
goldConfigs
)
...
...
@@ -26,12 +26,12 @@ void Trainer::createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, s
machine
.
trainMode
(
false
);
extractExamples
(
configs
,
debug
,
dir
,
epoch
,
dynamicOracle
,
explorationThreshold
);
extractExamples
(
configs
,
debug
,
dir
,
epoch
,
dynamicOracle
,
explorationThreshold
,
memcheck
);
machine
.
saveDicts
();
}
void
Trainer
::
extractExamples
(
std
::
vector
<
SubConfig
>
&
configs
,
bool
debug
,
std
::
filesystem
::
path
dir
,
int
epoch
,
bool
dynamicOracle
,
float
explorationThreshold
)
void
Trainer
::
extractExamples
(
std
::
vector
<
SubConfig
>
&
configs
,
bool
debug
,
std
::
filesystem
::
path
dir
,
int
epoch
,
bool
dynamicOracle
,
float
explorationThreshold
,
bool
memcheck
)
{
torch
::
AutoGradMode
useGrad
(
false
);
...
...
@@ -50,10 +50,13 @@ void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std:
std
::
atomic
<
int
>
totalNbExamples
=
0
;
if
(
memcheck
)
fmt
::
print
(
stderr
,
"[{}] Memory : {}
\n
"
,
util
::
getTime
(),
util
::
getMemUsage
());
NeuralNetworkImpl
::
setDevice
(
torch
::
kCPU
);
machine
.
to
(
NeuralNetworkImpl
::
getDevice
());
std
::
for_each
(
std
::
execution
::
par
,
configs
.
begin
(),
configs
.
end
(),
[
this
,
maxNbExamplesPerFile
,
&
examplesPerState
,
&
totalNbExamples
,
debug
,
dynamicOracle
,
explorationThreshold
,
dir
,
epoch
,
&
examplesMutex
](
SubConfig
&
config
)
[
this
,
maxNbExamplesPerFile
,
&
examplesPerState
,
&
totalNbExamples
,
debug
,
memcheck
,
dynamicOracle
,
explorationThreshold
,
dir
,
epoch
,
&
examplesMutex
](
SubConfig
&
config
)
{
config
.
addPredicted
(
machine
.
getPredicted
());
config
.
setStrategy
(
machine
.
getStrategyDefinition
());
...
...
@@ -189,7 +192,11 @@ void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std:
if
(
config
.
needsUpdate
())
config
.
update
();
}
// End while true
if
(
memcheck
)
fmt
::
print
(
stderr
,
"[{}] Memory : {}
\n
"
,
util
::
getTime
(),
util
::
getMemUsage
());
});
// End for on configs
for
(
auto
&
it
:
examplesPerState
)
...
...
@@ -203,6 +210,8 @@ void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std:
util
::
myThrow
(
fmt
::
format
(
"could not create file '{}'"
,
currentEpochAllExtractedFile
.
c_str
()));
std
::
fclose
(
f
);
if
(
memcheck
)
fmt
::
print
(
stderr
,
"[{}] Memory : {}
\n
"
,
util
::
getTime
(),
util
::
getMemUsage
());
fmt
::
print
(
stderr
,
"[{}] Extracted {} examples
\n
"
,
util
::
getTime
(),
util
::
int2HumanStr
(
totalNbExamples
));
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment