retriever.py 1.76 KB
Newer Older
1
2
3
import logging
logger = logging.getLogger(__name__)

4
from dbxref import config
5
6
7
from itertools import groupby
import json

8
def retrieve(dbxrefs):
9
    sorted(dbxrefs, key=lambda x: x['db'].lower()) # normalize db to lowercase to allow differently cased notations
10
11
    results = []
    for key, dbxrefs in groupby(dbxrefs, lambda x: x['db']):
12
13
        if config.has_provider(key):
            provider = config.get_provider(key)
14
15
            logger.debug('{0} is supported'.format(key))
            if provider['retriever']['type'] == 'external':
16
                results.extend( load_with_external_provider(provider, list(dbxrefs)))
17
            elif provider['retriever']['type'] == 'internal':
18
                results.extend(load_with_internal_provider(provider, list(dbxrefs)))
19
20
21
22
            else:
                raise Exception('Unknown retriever type', provider['retriever']['type'])
        else:
            logger.debug('{0} is not supported'.format(key))
23
            results.extend( map(lambda x: {'id': toString(x), 'status': 'not supported'}, dbxrefs))
24
    return (results)
25

26
def load_with_external_provider(provider, dbxrefs):
27
28
    logger.debug('Loading {0} via external provider'.format(dbxrefs))
    script = provider['retriever']['location']
29
    call = '{} {}'.format(script, ' '.join(list(map(toString, dbxrefs))))
30
31
32
33
34
    logger.debug("Running '{}'".format(call))
    import subprocess
    result = subprocess.check_output(call, shell=True)
    return json.loads(result.decode('utf-8'))

35
36
37
def load_with_internal_provider(provider, dbxrefs):
    import importlib
    retrieve_method = getattr(importlib.import_module(provider['retriever']['location']), 'retrieve')
38
39
    retrieved = retrieve_method(dbxrefs)
    return retrieved
40

41
42
def toString(dbxref):
    return '{}:{}'.format(dbxref['db'], dbxref['id'])