From d00a9d23e5b472bea90c8c1d3f633f53ce07dca3 Mon Sep 17 00:00:00 2001 From: Benoit Favre <benoit.favre@lis-lab.fr> Date: Wed, 26 Aug 2020 10:40:22 +0200 Subject: [PATCH] update bibliovid scrapper fields --- bibliovid_scrapper.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/bibliovid_scrapper.py b/bibliovid_scrapper.py index 982d283..dff4ac8 100644 --- a/bibliovid_scrapper.py +++ b/bibliovid_scrapper.py @@ -24,19 +24,34 @@ for article in articles['results']: return node.get_text().strip() return '' + #title = safe_text(main.find('h1')) + #link = divs[8].find('a').attrs['href'] + #findings = safe_text(divs[12].find('div')) + #take_away = safe_text(divs[14].contents[0].find('div')) + #relevance_level = safe_text(divs[16].contents[0].find('div')) + #objectives = safe_text(divs[18].contents[0].find('div')) + #methods = safe_text(divs[20].contents[0].find('div')) + + #article['link'] = link + #article['findings'] = findings + #article['take_away'] = take_away + #article['relevance_level'] = relevance_level + #article['objectives'] = objectives + #article['methods'] = methods + title = safe_text(main.find('h1')) link = divs[8].find('a').attrs['href'] - findings = safe_text(divs[12].find('div')) - take_away = safe_text(divs[14].contents[0].find('div')) - relevance_level = safe_text(divs[16].contents[0].find('div')) - objectives = safe_text(divs[18].contents[0].find('div')) - methods = safe_text(divs[20].contents[0].find('div')) + findings = safe_text(divs[14].find('div')) + take_away = safe_text(divs[16].contents[0].find('div')) + relevance_level = safe_text(divs[18].contents[0].find('div')) + objectives = safe_text(divs[20].contents[0].find('div')) + methods = safe_text(divs[22].contents[0].find('div')) article['link'] = link - article['findings'] = findings - article['take_away'] = take_away - article['relevance_level'] = relevance_level - article['objectives'] = objectives + article['results'] = findings + article['synthesis'] = take_away + article['strength_of_evidence_details'] = relevance_level + article['goals'] = objectives article['methods'] = methods print(json.dumps(articles, indent=2)) -- GitLab