diff --git a/README.md b/README.md
index b30d921c1f808f7615272aec9eb402ed9140b9ba..ffe9cf482357ece53efd90ae72e6e086ee6bb235 100644
--- a/README.md
+++ b/README.md
@@ -10,8 +10,18 @@ Data
 ----
 
 Input data is expected to be a json-formatted file containing a list of articles. Each article
-should have a title, an abstract and a topics field containing a list of topics.
-
+should have a title, an abstract and a topics field containing a list of topics. Other fields are ignored.
+```
+[
+  {
+    "title": "this is a title",
+    "abstract": "this is an abstract",
+    "topics": ["topic1", "topic2", "topic3"]
+    ...
+  },
+  ...
+]
+```
 
 Installing
 ----------
@@ -22,6 +32,8 @@ source env/bin/activate
 pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html
 ```
 
+If updates have broken dependencies, you may use requirements-freeze.txt instead of requirements.txt.
+Note that we use pytorch with cuda 10.1; you may change requirements.txt to use a different version depending on your setup (see https://pytorch.org/get-started/locally/).
 
 Training
 --------
diff --git a/requirements-freeze.txt b/requirements-freeze.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a6bb22282a13676631188d3e44240d3e9229658
--- /dev/null
+++ b/requirements-freeze.txt
@@ -0,0 +1,37 @@
+absl-py==0.9.0
+cachetools==4.1.0
+certifi==2020.4.5.1
+chardet==3.0.4
+click==7.1.2
+filelock==3.0.12
+future==0.18.2
+google-auth==1.16.0
+google-auth-oauthlib==0.4.1
+grpcio==1.29.0
+idna==2.9
+importlib-metadata==1.6.0
+joblib==0.15.1
+Markdown==3.2.2
+numpy==1.18.4
+oauthlib==3.1.0
+protobuf==3.12.2
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pytorch-lightning==0.7.6
+PyYAML==5.3.1
+regex==2020.5.14
+requests==2.23.0
+requests-oauthlib==1.3.0
+rsa==4.0
+sacremoses==0.0.43
+sentencepiece==0.1.91
+six==1.15.0
+tensorboard==2.2.2
+tensorboard-plugin-wit==1.6.0.post3
+tokenizers==0.7.0
+torch==1.5.0+cu101
+tqdm==4.46.0
+transformers==2.10.0
+urllib3==1.25.9
+Werkzeug==1.0.1
+zipp==3.1.0