# Convert CORD-19 metadata csv to json while normalizing a few fields.

import sys
import csv
import json

data = []

with open(sys.argv[1]) as fp:
  reader = csv.reader(fp)
  headers = next(reader)
  for row in reader:
    entry = {name.lower(): row[i] for i, name in enumerate(headers)}
    # normalize a few fields
    if 'doi' in entry and 'url' not in entry:
      entry['url'] = 'https://www.doi.org/' + entry['doi']
    if 'publish_time' in entry and 'publication_date' not in entry:
      entry['publication_date'] = entry['publish_time']
    data.append(entry)

print(json.dumps(data, indent=2))