# Copyright (C) 2018 Robin Krahl # SPDX-License-Identifier: MIT import mimetypes import urllib.request import bibtexparser import PyPDF2.pdf PDF_INFO_DOI = '/doi' def _get_data_for_doi(doi): url = 'http://dx.doi.org/{}'.format(doi) headers = {'Accept': 'text/bibliography; style=bibtex'} request = urllib.request.Request(url, headers=headers) with urllib.request.urlopen(request) as response: bibtex = response.read().decode('utf-8') return bibtexparser.loads(bibtex) def _handle_pdf(filename): with open(filename, 'rb') as f: reader = PyPDF2.pdf.PdfFileReader(f) if PDF_INFO_DOI not in reader.documentInfo: raise Exception('PDF file does not have doi header') doi = reader.documentInfo[PDF_INFO_DOI] return _get_data_for_doi(doi) def get_bibtex_data(filename): mtype, _ = mimetypes.guess_type(filename) if mtype == 'application/pdf': return _handle_pdf(filename) raise Exception('Unsupported file type ' + str(mtype))