import json, sys
from datetime import datetime, date
import urllib.request
import xml.etree.ElementTree as ET

tool = "https://covid19.lis-lab.fr"
email = "benoit.favre@univ-amu.fr"

month_mapping = {
'Jan': '01',
'Feb': '02',
'Mar': '03',
'Apr': '04',
'May': '05',
'Jun': '06',
'Jul': '07',
'Aug': '08',
'Sep': '09',
'Oct': '10',
'Nov': '11',
'Dec': '12',
}

def map_month(text):
  key = text[:3].lower().capitalize()
  if key in month_mapping:
    return month_mapping[key]
  return text

def make_batches(sequence, size=100):
  i = 0
  while i < len(sequence):
    yield sequence[i: i + size]
    i += size

def fetch(articles):
  ids = [article['pmid'] for article in articles]
  by_id = {str(article['pmid']): article for article in articles}
  url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&rettype=xml&tool=%s&email=%s&%s' % (tool, email, '&'.join(['id=' + str(i) for i in ids]))
  with urllib.request.urlopen(url) as response:
    read = response.read()
    root = ET.fromstring(read)
    for article in root.findall('.//PubmedArticle'):
      pmid = article.findtext('.//PMID')
      if pmid in by_id:
        found = by_id[pmid]
        abstract = ' '.join([''.join(line.itertext()) for line in article.findall('.//Abstract/AbstractText')])
        found['abstract'] = abstract

if len(sys.argv) != 2:
  print('usage: %s <articles-json>' % sys.argv[0])
  sys.exit(1)

with open(sys.argv[1]) as fp:
  articles = json.loads(fp.read())

for batch in make_batches(articles, 100):
  fetch(batch)

print(json.dumps(articles, indent=2))