diff --git a/README.md b/README.md index b30d921c1f808f7615272aec9eb402ed9140b9ba..ffe9cf482357ece53efd90ae72e6e086ee6bb235 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,18 @@ Data ---- Input data is expected to be a json-formatted file containing a list of articles. Each article -should have a title, an abstract and a topics field containing a list of topics. - +should have a title, an abstract and a topics field containing a list of topics. Other fields are ignored. +``` +[ + { + "title": "this is a title", + "abstract": "this is an abstract", + "topics": ["topic1", "topic2", "topic3"] + ... + }, + ... +] +``` Installing ---------- @@ -22,6 +32,8 @@ source env/bin/activate pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html ``` +If updates have broken dependencies, you may use requirements-freeze.txt instead of requirements.txt. +Note that we use pytorch with cuda 10.1; you may change requirements.txt to use a different version depending on your setup (see https://pytorch.org/get-started/locally/). Training -------- diff --git a/requirements-freeze.txt b/requirements-freeze.txt new file mode 100644 index 0000000000000000000000000000000000000000..2a6bb22282a13676631188d3e44240d3e9229658 --- /dev/null +++ b/requirements-freeze.txt @@ -0,0 +1,37 @@ +absl-py==0.9.0 +cachetools==4.1.0 +certifi==2020.4.5.1 +chardet==3.0.4 +click==7.1.2 +filelock==3.0.12 +future==0.18.2 +google-auth==1.16.0 +google-auth-oauthlib==0.4.1 +grpcio==1.29.0 +idna==2.9 +importlib-metadata==1.6.0 +joblib==0.15.1 +Markdown==3.2.2 +numpy==1.18.4 +oauthlib==3.1.0 +protobuf==3.12.2 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pytorch-lightning==0.7.6 +PyYAML==5.3.1 +regex==2020.5.14 +requests==2.23.0 +requests-oauthlib==1.3.0 +rsa==4.0 +sacremoses==0.0.43 +sentencepiece==0.1.91 +six==1.15.0 +tensorboard==2.2.2 +tensorboard-plugin-wit==1.6.0.post3 +tokenizers==0.7.0 +torch==1.5.0+cu101 +tqdm==4.46.0 +transformers==2.10.0 +urllib3==1.25.9 +Werkzeug==1.0.1 +zipp==3.1.0