Skip to content
Snippets Groups Projects
Commit 3c3acb33 authored by Franck Dary's avatar Franck Dary
Browse files

Added option memcheck to train

parent e496576e
No related branches found
No related tags found
No related merge requests found
......@@ -53,6 +53,7 @@ bool isUrl(const std::string & s);
bool isNumber(const std::string & s);
std::string getTime();
std::string getMemUsage();
long float2long(float f);
float long2float(long l);
......
......@@ -2,6 +2,9 @@
#include "utf8.hpp"
#include <ctime>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <unistd.h>
#include "upper2lower"
float util::long2float(long l)
......@@ -236,6 +239,24 @@ std::string util::getTime()
return std::string(buffer);
}
std::string util::getMemUsage()
{
float vm_usage = 0.0;
float resident_set = 0.0;
unsigned long vsize;
long rss;
std::string ignore;
std::ifstream ifs("/proc/self/stat", std::ios_base::in);
ifs >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> ignore >> vsize >> rss;
long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
vm_usage = vsize / 1024.0;
resident_set = rss * page_size_kb;
return fmt::format("Virtual:{:.2f}Go Physical:{:.2f}Go", vm_usage/1000000.0, resident_set/1000000.0);
}
bool util::choiceWithProbability(float probability)
{
int maxVal = 100000;
......
......@@ -55,13 +55,13 @@ class Trainer
private :
void extractExamples(std::vector<SubConfig> & configs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold);
void extractExamples(std::vector<SubConfig> & configs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold, bool memcheck);
float processDataset(DataLoader & loader, bool train, bool printAdvancement, int nbExamples);
public :
Trainer(ReadingMachine & machine, int batchSize);
void createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold);
void createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold, bool memcheck);
void extractActionSequence(BaseConfig & config);
void makeDataLoader(std::filesystem::path dir);
void makeDevDataLoader(std::filesystem::path dir);
......
......@@ -22,6 +22,7 @@ po::options_description MacaonTrain::getOptionsDescription()
opt.add_options()
("debug,d", "Print debuging infos on stderr")
("silent", "Don't print speed and progress")
("memcheck", "Regularly print memory usage on stderr")
("devScore", "Compute score on dev instead of loss (slower)")
("mcd", po::value<std::string>()->default_value("ID,FORM,LEMMA,UPOS,XPOS,FEATS,HEAD,DEPREL"),
"Comma separated column names that describes the input/output format")
......@@ -133,6 +134,7 @@ int MacaonTrain::main()
auto nbEpoch = variables["nbEpochs"].as<int>();
auto batchSize = variables["batchSize"].as<int>();
bool debug = variables.count("debug") == 0 ? false : true;
bool memcheck = variables.count("memcheck") == 0 ? false : true;
bool printAdvancement = !debug && variables.count("silent") == 0 ? true : false;
bool computeDevScore = variables.count("devScore") == 0 ? false : true;
auto machineContent = variables["machine"].as<std::string>();
......@@ -267,11 +269,11 @@ int MacaonTrain::main()
if (trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractGold) or trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic))
{
machine.setDictsState(trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic) ? Dict::State::Closed : Dict::State::Open);
trainer.createDataset(goldConfigs, debug, modelPath/"examples/train", currentEpoch, trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic), explorationThreshold);
trainer.createDataset(goldConfigs, debug, modelPath/"examples/train", currentEpoch, trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic), explorationThreshold, memcheck);
if (!computeDevScore)
{
machine.setDictsState(Dict::State::Closed);
trainer.createDataset(devGoldConfigs, debug, modelPath/"examples/dev", currentEpoch, trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic), explorationThreshold);
trainer.createDataset(devGoldConfigs, debug, modelPath/"examples/dev", currentEpoch, trainStrategy[currentEpoch].count(Trainer::TrainAction::ExtractDynamic), explorationThreshold, memcheck);
}
}
if (trainStrategy[currentEpoch].count(Trainer::TrainAction::ResetParameters) or trainStrategy[currentEpoch].count(Trainer::TrainAction::ResetOptimizer))
......@@ -392,6 +394,8 @@ int MacaonTrain::main()
std::FILE * f = std::fopen(trainInfos.c_str(), "a");
fmt::print(f, "{}\t{}\n", iterStr, devScoreMean);
std::fclose(f);
if (memcheck)
fmt::print(stderr, "[{}] Memory : {}\n", util::getTime(), util::getMemUsage());
}
}
......
......@@ -18,7 +18,7 @@ void Trainer::makeDevDataLoader(std::filesystem::path dir)
devDataLoader = torch::data::make_data_loader(*devDataset, torch::data::DataLoaderOptions(batchSize).workers(0).max_jobs(0));
}
void Trainer::createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold)
void Trainer::createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold, bool memcheck)
{
std::vector<SubConfig> configs;
for (auto & goldConfig : goldConfigs)
......@@ -26,12 +26,12 @@ void Trainer::createDataset(std::vector<BaseConfig> & goldConfigs, bool debug, s
machine.trainMode(false);
extractExamples(configs, debug, dir, epoch, dynamicOracle, explorationThreshold);
extractExamples(configs, debug, dir, epoch, dynamicOracle, explorationThreshold, memcheck);
machine.saveDicts();
}
void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold)
void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std::filesystem::path dir, int epoch, bool dynamicOracle, float explorationThreshold, bool memcheck)
{
torch::AutoGradMode useGrad(false);
......@@ -50,10 +50,13 @@ void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std:
std::atomic<int> totalNbExamples = 0;
if (memcheck)
fmt::print(stderr, "[{}] Memory : {}\n", util::getTime(), util::getMemUsage());
NeuralNetworkImpl::setDevice(torch::kCPU);
machine.to(NeuralNetworkImpl::getDevice());
std::for_each(std::execution::par, configs.begin(), configs.end(),
[this, maxNbExamplesPerFile, &examplesPerState, &totalNbExamples, debug, dynamicOracle, explorationThreshold, dir, epoch, &examplesMutex](SubConfig & config)
[this, maxNbExamplesPerFile, &examplesPerState, &totalNbExamples, debug, memcheck, dynamicOracle, explorationThreshold, dir, epoch, &examplesMutex](SubConfig & config)
{
config.addPredicted(machine.getPredicted());
config.setStrategy(machine.getStrategyDefinition());
......@@ -189,7 +192,11 @@ void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std:
if (config.needsUpdate())
config.update();
} // End while true
if (memcheck)
fmt::print(stderr, "[{}] Memory : {}\n", util::getTime(), util::getMemUsage());
}); // End for on configs
for (auto & it : examplesPerState)
......@@ -203,6 +210,8 @@ void Trainer::extractExamples(std::vector<SubConfig> & configs, bool debug, std:
util::myThrow(fmt::format("could not create file '{}'", currentEpochAllExtractedFile.c_str()));
std::fclose(f);
if (memcheck)
fmt::print(stderr, "[{}] Memory : {}\n", util::getTime(), util::getMemUsage());
fmt::print(stderr, "[{}] Extracted {} examples\n", util::getTime(), util::int2HumanStr(totalNbExamples));
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment