From 4a0cdafd28b4288b8f9fae9db84b0ebd8dfba8a8 Mon Sep 17 00:00:00 2001 From: Robin Krahl Date: Thu, 8 Mar 2018 21:07:16 +0100 Subject: Add basic import implementation --- bibtool/cli.py | 42 ++++++++++++++++++++++++++++++++++++++++-- bibtool/extract.py | 37 +++++++++++++++++++++++++++++++++++++ requirements.txt | 2 ++ 3 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 bibtool/extract.py diff --git a/bibtool/cli.py b/bibtool/cli.py index 23ceaa4..3356d16 100644 --- a/bibtool/cli.py +++ b/bibtool/cli.py @@ -1,6 +1,12 @@ # Copyright (C) 2018 Robin Krahl # SPDX-License-Identifier: MIT +import os +import os.path +import shutil + +import bibtexparser +import bibtool.extract import click @@ -15,6 +21,38 @@ def cli(): @cli.command('import') @click.argument('filename', type=TYPE_FILE) -@click.option('--directory', type=TYPE_DIR) +@click.option('--directory', type=TYPE_DIR, default=os.getcwd()) def _import(filename, directory): - click.echo(filename) + bibtex_data = bibtool.extract.get_bibtex_data(filename) + + if not bibtex_data.entries: + raise Exception('Did not find any Bibtex entry.') + if len(bibtex_data.entries) > 1: + raise Exception('Found more than one Bibtex entries.') + + entry = bibtex_data.entries[0] + click.echo('Found one Bibtex entry: ' + entry['ID']) + if click.confirm('Do you want to edit the entry?'): + edited_data = click.edit(bibtexparser.dumps(bibtex_data)) + if edited_data: + bibtex_data = bibtexparser.loads(edited_data) + if len(bibtex_data.entries) != 1: + raise Exception('The edited data must contain exactly one Bibtex entry.') + entry = bibtex_data.entries[0] + + click.confirm('Add {} to the repository?'.format(entry['ID']), + default=True, abort=True) + + bibfilename = os.path.join(directory, entry['ID'] + '.bib') + outfileext = os.path.splitext(filename)[1] + outfilename = os.path.join(directory, entry['ID'] + outfileext) + + if os.path.exists(bibfilename) or os.path.exists(outfilename): + click.confirm('There is already a document with this ID in the ' + 'repository. Continue anyway?', abort=True) + + with open(bibfilename, 'w') as f: + bibtexparser.dump(bibtex_data, f) + shutil.copy(filename, outfilename) + + click.echo('Added {} to the repository.'.format(entry['ID'])) diff --git a/bibtool/extract.py b/bibtool/extract.py new file mode 100644 index 0000000..6409f1b --- /dev/null +++ b/bibtool/extract.py @@ -0,0 +1,37 @@ +# Copyright (C) 2018 Robin Krahl +# SPDX-License-Identifier: MIT + +import mimetypes +import urllib.request + +import bibtexparser +import PyPDF2.pdf + + +PDF_INFO_DOI = '/doi' + + +def _get_data_for_doi(doi): + url = 'http://dx.doi.org/{}'.format(doi) + headers = {'Accept': 'text/bibliography; style=bibtex'} + request = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(request) as response: + bibtex = response.read().decode('utf-8') + return bibtexparser.loads(bibtex) + + +def _handle_pdf(filename): + with open(filename, 'rb') as f: + reader = PyPDF2.pdf.PdfFileReader(f) + if PDF_INFO_DOI not in reader.documentInfo: + raise Exception('PDF file does not have doi header') + doi = reader.documentInfo[PDF_INFO_DOI] + + return _get_data_for_doi(doi) + + +def get_bibtex_data(filename): + mtype, _ = mimetypes.guess_type(filename) + if mtype == 'application/pdf': + return _handle_pdf(filename) + raise Exception('Unsupported file type ' + str(mtype)) diff --git a/requirements.txt b/requirements.txt index dca9a90..620d787 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ +bibtexparser click +PyPDF2 -- cgit v1.2.1