From a82050a970548a0baef0ae388f09ed27f220c378 Mon Sep 17 00:00:00 2001
From: Benoit Favre <benoit.favre@lis-lab.fr>
Date: Tue, 2 Jun 2020 10:36:05 +0200
Subject: [PATCH] fix bugs in bibliovid scrapper

---
 bibliovid_add_abstract.py |  5 +++--
 run.sh                    | 16 ++++++++--------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/bibliovid_add_abstract.py b/bibliovid_add_abstract.py
index d3ffc90..98e3a56 100644
--- a/bibliovid_add_abstract.py
+++ b/bibliovid_add_abstract.py
@@ -43,10 +43,11 @@ for article in articles['results']:
   if not found:
     print('NOT FOUND:', title, file=sys.stderr)
 
+print(json.dumps(articles, indent=2))
+
 print('TOTAL', len(articles['results']), file=sys.stderr)
 for key, value in stats.items():
-  print(key, value, value / len(articles['results'], file=sys.stderr))
+  print(key, value, value / len(articles['results']), file=sys.stderr)
 
-print(json.dumps(articles, indent=2))
 
 
diff --git a/run.sh b/run.sh
index 7ca228d..ee450cd 100755
--- a/run.sh
+++ b/run.sh
@@ -13,20 +13,20 @@ out="$dir/data/"`date '+%Y%m%d'`
 mkdir -p "$out"
 
 # CORD-19 metadata
-#curl https://ai2-semanticscholar-cord-19.s3-us-west-2.amazonaws.com/latest/metadata.csv > "$out/cord19-metadata_stage1.csv"
-#python "$dir/cord19_csv2json.py" "$out/cord19-metadata_stage1.csv" > "$out/cord19-metadata.json"
+curl https://ai2-semanticscholar-cord-19.s3-us-west-2.amazonaws.com/latest/metadata.csv > "$out/cord19-metadata_stage1.csv"
+python "$dir/cord19_csv2json.py" "$out/cord19-metadata_stage1.csv" > "$out/cord19-metadata.json"
 
 # litcovid
-#python "$dir/litcovid_scrapper.py" > "$out/litcovid_stage1.json"
-#python "$dir/litcovid_add_abstract.py" "$out/litcovid_stage1.json" > "$out/litcovid.json"
+python "$dir/litcovid_scrapper.py" > "$out/litcovid_stage1.json"
+python "$dir/litcovid_add_abstract.py" "$out/litcovid_stage1.json" > "$out/litcovid.json"
 
 # bibliovid
-#count=`curl 'https://bibliovid.org/api/v1/posts?format=json' | python -mjson.tool | grep '"count":' | grep -o '[0-9]*'`
-#curl "https://bibliovid.org/api/v1/posts?format=json&offset=0&limit=$count" | python -mjson.tool > "$out/bibliovid_stage1.json"
-#python "$dir/bibliovid_scrapper.py" "$out/bibliovid_stage1.json" > "$out/bibliovid_stage2.json"
+count=`curl 'https://bibliovid.org/api/v1/posts?format=json' | python -mjson.tool | grep '"count":' | grep -o '[0-9]*'`
+curl "https://bibliovid.org/api/v1/posts?format=json&offset=0&limit=$count" | python -mjson.tool > "$out/bibliovid_stage1.json"
+python "$dir/bibliovid_scrapper.py" "$out/bibliovid_stage1.json" > "$out/bibliovid_stage2.json"
 python "$dir/bibliovid_add_abstract.py" "$out/bibliovid_stage2.json" > "$out/bibliovid_stage3.json"
 python "$dir/bibliovid_normalize.py" "$out/bibliovid_stage3.json" > "$out/bibliovid.json"
 
 # cleanup
-rm "$out/*stage*" 
+rm "$out/"*_stage*
 
-- 
GitLab