From 4a0cdafd28b4288b8f9fae9db84b0ebd8dfba8a8 Mon Sep 17 00:00:00 2001 From: Robin Krahl Date: Thu, 8 Mar 2018 21:07:16 +0100 Subject: Add basic import implementation --- bibtool/extract.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 bibtool/extract.py (limited to 'bibtool/extract.py') diff --git a/bibtool/extract.py b/bibtool/extract.py new file mode 100644 index 0000000..6409f1b --- /dev/null +++ b/bibtool/extract.py @@ -0,0 +1,37 @@ +# Copyright (C) 2018 Robin Krahl +# SPDX-License-Identifier: MIT + +import mimetypes +import urllib.request + +import bibtexparser +import PyPDF2.pdf + + +PDF_INFO_DOI = '/doi' + + +def _get_data_for_doi(doi): + url = 'http://dx.doi.org/{}'.format(doi) + headers = {'Accept': 'text/bibliography; style=bibtex'} + request = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(request) as response: + bibtex = response.read().decode('utf-8') + return bibtexparser.loads(bibtex) + + +def _handle_pdf(filename): + with open(filename, 'rb') as f: + reader = PyPDF2.pdf.PdfFileReader(f) + if PDF_INFO_DOI not in reader.documentInfo: + raise Exception('PDF file does not have doi header') + doi = reader.documentInfo[PDF_INFO_DOI] + + return _get_data_for_doi(doi) + + +def get_bibtex_data(filename): + mtype, _ = mimetypes.guess_type(filename) + if mtype == 'application/pdf': + return _handle_pdf(filename) + raise Exception('Unsupported file type ' + str(mtype)) -- cgit v1.2.1