diff --git a/bibliovid_add_abstract.py b/bibliovid_add_abstract.py index d3ffc90cff7a0f57ac1d5bfde84d6ffdc6919cc5..98e3a5692434e91876e9477e2bec45914f1aff29 100644 --- a/bibliovid_add_abstract.py +++ b/bibliovid_add_abstract.py @@ -43,10 +43,11 @@ for article in articles['results']: if not found: print('NOT FOUND:', title, file=sys.stderr) +print(json.dumps(articles, indent=2)) + print('TOTAL', len(articles['results']), file=sys.stderr) for key, value in stats.items(): - print(key, value, value / len(articles['results'], file=sys.stderr)) + print(key, value, value / len(articles['results']), file=sys.stderr) -print(json.dumps(articles, indent=2)) diff --git a/run.sh b/run.sh index 7ca228dc38e00cb3cad4970873265aeee0bfd8c7..ee450cdda964cbf3834f3399e7ee396fdb3229b4 100755 --- a/run.sh +++ b/run.sh @@ -13,20 +13,20 @@ out="$dir/data/"`date '+%Y%m%d'` mkdir -p "$out" # CORD-19 metadata -#curl https://ai2-semanticscholar-cord-19.s3-us-west-2.amazonaws.com/latest/metadata.csv > "$out/cord19-metadata_stage1.csv" -#python "$dir/cord19_csv2json.py" "$out/cord19-metadata_stage1.csv" > "$out/cord19-metadata.json" +curl https://ai2-semanticscholar-cord-19.s3-us-west-2.amazonaws.com/latest/metadata.csv > "$out/cord19-metadata_stage1.csv" +python "$dir/cord19_csv2json.py" "$out/cord19-metadata_stage1.csv" > "$out/cord19-metadata.json" # litcovid -#python "$dir/litcovid_scrapper.py" > "$out/litcovid_stage1.json" -#python "$dir/litcovid_add_abstract.py" "$out/litcovid_stage1.json" > "$out/litcovid.json" +python "$dir/litcovid_scrapper.py" > "$out/litcovid_stage1.json" +python "$dir/litcovid_add_abstract.py" "$out/litcovid_stage1.json" > "$out/litcovid.json" # bibliovid -#count=`curl 'https://bibliovid.org/api/v1/posts?format=json' | python -mjson.tool | grep '"count":' | grep -o '[0-9]*'` -#curl "https://bibliovid.org/api/v1/posts?format=json&offset=0&limit=$count" | python -mjson.tool > "$out/bibliovid_stage1.json" -#python "$dir/bibliovid_scrapper.py" "$out/bibliovid_stage1.json" > "$out/bibliovid_stage2.json" +count=`curl 'https://bibliovid.org/api/v1/posts?format=json' | python -mjson.tool | grep '"count":' | grep -o '[0-9]*'` +curl "https://bibliovid.org/api/v1/posts?format=json&offset=0&limit=$count" | python -mjson.tool > "$out/bibliovid_stage1.json" +python "$dir/bibliovid_scrapper.py" "$out/bibliovid_stage1.json" > "$out/bibliovid_stage2.json" python "$dir/bibliovid_add_abstract.py" "$out/bibliovid_stage2.json" > "$out/bibliovid_stage3.json" python "$dir/bibliovid_normalize.py" "$out/bibliovid_stage3.json" > "$out/bibliovid.json" # cleanup -rm "$out/*stage*" +rm "$out/"*_stage*