diff --git a/README.md b/README.md index fa1726f5e80de1e5a8ca7b4f32c9b02f8b0f92a1..7af4d0204848cc0dce02e8c9d27ed3bf80ae77c8 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,16 @@ Designed to be run at most once a day. ``` ./run.sh ``` + +Sources +------- + +* litcovid: NIH-curated list of COVID-19 articles (https://www.ncbi.nlm.nih.gov/research/coronavirus/) +Labels (8): General Information, Mechanism, Transmission, Diagnosis, Treatment, Prevention, Case Report, Epidemic Forecasting +Note that topic labels are semi-automatically assigned. See for details https://www.ncbi.nlm.nih.gov/research/coronavirus/faq + +* bibliovid: Paper categories and fine-grained analysis by experts (https://bibliovid.org/) +Labels (7): Autres, Diagnostique, Thérapeutique, Épidémiologique, Pronostique, Recommandations, Modélisation +Labels (19): Hépato-gastro-entérologie, Neurologie, Cardiologie et maladies métaboliques, Hématologie, Gériatrie, Infectiologie, Gynécologie Obstétrique, Dermatologie, Pédiatrie, Pneumologie, Transversale, Psychiatrie, Virologie, Anesthésie-Réanimation, Radiologie, Hygiène, Néphrologie, Confinement/Déconfinement, Immunité + +* CORD-19 metadata: large set of papers metadata selected with broad queries on general coronavirus research diff --git a/bibliovid_add_abstract.py b/bibliovid_add_abstract.py index 98e3a5692434e91876e9477e2bec45914f1aff29..df59b606bc28aad703e0c5d6ab67908501552386 100644 --- a/bibliovid_add_abstract.py +++ b/bibliovid_add_abstract.py @@ -8,6 +8,10 @@ from datetime import datetime, date pubmed = PubMed(tool="https://covid19.lis-lab.fr", email="benoit.favre@univ-amu.fr") +if len(sys.argv) != 3: + print('usage: %s <input> <output>' % sys.argv[0], file=sys.stderr) + sys.exit(1) + with open(sys.argv[1]) as fp: articles = json.loads(fp.read()) @@ -43,7 +47,8 @@ for article in articles['results']: if not found: print('NOT FOUND:', title, file=sys.stderr) -print(json.dumps(articles, indent=2)) +with open(sys.argv[2], 'w') as fp: + fp.write(json.dumps(articles, indent=2)) print('TOTAL', len(articles['results']), file=sys.stderr) for key, value in stats.items(): diff --git a/run.sh b/run.sh index ee450cdda964cbf3834f3399e7ee396fdb3229b4..a64ca9a90e0b1bcaeb462a8f4e23282da915c25b 100755 --- a/run.sh +++ b/run.sh @@ -24,7 +24,7 @@ python "$dir/litcovid_add_abstract.py" "$out/litcovid_stage1.json" > "$out/litco count=`curl 'https://bibliovid.org/api/v1/posts?format=json' | python -mjson.tool | grep '"count":' | grep -o '[0-9]*'` curl "https://bibliovid.org/api/v1/posts?format=json&offset=0&limit=$count" | python -mjson.tool > "$out/bibliovid_stage1.json" python "$dir/bibliovid_scrapper.py" "$out/bibliovid_stage1.json" > "$out/bibliovid_stage2.json" -python "$dir/bibliovid_add_abstract.py" "$out/bibliovid_stage2.json" > "$out/bibliovid_stage3.json" +python "$dir/bibliovid_add_abstract.py" "$out/bibliovid_stage2.json" "$out/bibliovid_stage3.json" python "$dir/bibliovid_normalize.py" "$out/bibliovid_stage3.json" > "$out/bibliovid.json" # cleanup