Commit 3c3acb33 authored by Franck Dary's avatar Franck Dary
Browse files

Added option memcheck to train

parent e496576e
......@@ -53,6 +53,7 @@ bool isUrl(const std::string & s);
bool isNumber(const std::string & s);
std::string getTime();
std::string getMemUsage();
long float2long(float f);
float long2float(long l);
......
......@@ -2,6 +2,9 @@
#include "utf8.hpp"
#include <ctime>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <unistd.h>
#include "upper2lower"
float util::long2float(long l)
......@@ -236,6 +239,24 @@ std::string util::getTime()
return std::string(buffer);
}
std::string util::getMemUsage()
{
float vm_usage = 0.0;
float resident_set = 0.0;
unsigned long vsize;
long rss;
std::string ignore;
std::ifstream ifs("/proc/self/stat", std::ios_base::in);
ifs >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> vsize >> rss;
long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
vm_usage = vsize / 1024.0;
resident_set = rss * page_size_kb;
return fmt::format("Virtual:{:.2f}Go Physical:{:.2f}Go", vm_usage/1000000.0, resident_set/1000000.0);
}
bool util::choiceWithProbability(float probability)
{
int maxVal = 100000;
......
......@@ -55,13 +55,13 @@ class Trainer
private :
void extractExamples(std::vector<SubConfig> & configs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold);
void extractExamples(std::vector<SubConfig> & configs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold, bool memcheck);
float processDataset(DataLoader & loader, bool train, bool printAdvancement, int nbExamples);
public :
Trainer(ReadingMachine & machine, int batchSize);
void createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold);
void createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold, bool memcheck);
void extractActionSequence(BaseConfig & config);
void makeDataLoader(std::filesystem::path dir);
void makeDevDataLoader(std::filesystem::path dir);
......
......@@ -22,6 +22,7 @@ po::options_description MacaonTrain::getOptionsDescription()
opt.add_options()
("debug,d", "Print debuging infos on stderr")
("silent", "Don't print speed and progress")
("memcheck", "Regularly print memory usage on stderr")
("devScore", "Compute score on dev instead of loss (slower)")
("mcd", po::value<std::string>()->default_value("ID,FORM,LEMMA,UPOS,XPOS,FEATS,HEAD,DEPREL"),
"Comma separated column names that describes the input/output format")
......@@ -133,6 +134,7 @@ int MacaonTrain::main()
auto nbEpoch = variables["nbEpochs"].as<int>();
auto batchSize = variables["batchSize"].as<int>();
bool debug = variables.count("debug") == 0 ? false : true;
bool memcheck = variables.count("memcheck") == 0 ? false : true;
bool printAdvancement = !debug && variables.count("silent") == 0 ? true : false;
bool computeDevScore = variables.count("devScore") == 0 ? false : true;
auto machineContent = variables["machine"].as<std::string>();
......@@ -267,11 +269,11 @@ int MacaonTrain::main()
if (trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractGold) or trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic))
{
machine.setDictsState(trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic) ? Dict::State::Closed : Dict::State::Open);
trainer.createDataset(goldConfigs, debug, modelPath/"examples/train", currentEpoch, trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic), explorationThreshold);
trainer.createDataset(goldConfigs, debug, modelPath/"examples/train", currentEpoch, trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic), explorationThreshold, memcheck);
if (!computeDevScore)
{
machine.setDictsState(Dict::State::Closed);
trainer.createDataset(devGoldConfigs, debug, modelPath/"examples/dev", currentEpoch, trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic), explorationThreshold);
trainer.createDataset(devGoldConfigs, debug, modelPath/"examples/dev", currentEpoch, trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic), explorationThreshold, memcheck);
}
}
if (trainStrategy[currentEpoch].count(Trainer::TrainAction::ResetParameters) or trainStrategy[currentEpoch].count(Trainer::TrainAction::ResetOptimizer))
......@@ -392,6 +394,8 @@ int MacaonTrain::main()
std::FILE * f = std::fopen(trainInfos.c_str(), "a");
fmt::print(f, "{}\t{}\n", iterStr, devScoreMean);
std::fclose(f);
if (memcheck)
fmt::print(stderr, "[{}] Memory : {}\n", util::getTime(), util::getMemUsage());
}
}
......
......@@ -18,7 +18,7 @@ void Trainer::makeDevDataLoader(std::filesystem::path dir)
devDataLoader = torch::data::make_data_loader(*devDataset, torch::data::DataLoaderOptions(batchSize).workers(0).max_jobs(0));
}
void Trainer::createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold)
void Trainer::createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold, bool memcheck)
{
std::vector<SubConfig> configs;
for (auto & goldConfig : goldConfigs)
......@@ -26,12 +26,12 @@ void Trainer::createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, s
machine.trainMode(false);
extractExamples(configs, debug, dir, epoch, dynamicOracle, explorationThreshold);
extractExamples(configs, debug, dir, epoch, dynamicOracle, explorationThreshold, memcheck);
machine.saveDicts();
}
void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold)
void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold, bool memcheck)
{
torch::AutoGradMode useGrad(false);
......@@ -50,10 +50,13 @@ void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std:
std::atomic<int> totalNbExamples = 0;
if (memcheck)
fmt::print(stderr, "[{}] Memory : {}\n", util::getTime(), util::getMemUsage());
NeuralNetworkImpl::setDevice(torch::kCPU);
machine.to(NeuralNetworkImpl::getDevice());
std::for_each(std::execution::par, configs.begin(), configs.end(),
[this, maxNbExamplesPerFile, &examplesPerState, &totalNbExamples, debug, dynamicOracle, explorationThreshold, dir, epoch, &examplesMutex](SubConfig & config)
[this, maxNbExamplesPerFile, &examplesPerState, &totalNbExamples, debug, memcheck, dynamicOracle, explorationThreshold, dir, epoch, &examplesMutex](SubConfig & config)
{
config.addPredicted(machine.getPredicted());
config.setStrategy(machine.getStrategyDefinition());
......@@ -189,7 +192,11 @@ void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std:
if (config.needsUpdate())
config.update();
} // End while true
if (memcheck)
fmt::print(stderr, "[{}] Memory : {}\n", util::getTime(), util::getMemUsage());
}); // End for on configs
for (auto & it : examplesPerState)
......@@ -203,6 +210,8 @@ void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std:
util::myThrow(fmt::format("could not create file '{}'", currentEpochAllExtractedFile.c_str()));
std::fclose(f);
if (memcheck)
fmt::print(stderr, "[{}] Memory : {}\n", util::getTime(), util::getMemUsage());
fmt::print(stderr, "[{}] Extracted {} examples\n", util::getTime(), util::int2HumanStr(totalNbExamples));
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment