retriever.py 3.47 KB
Newer Older
1
2
import logging
logger = logging.getLogger(__name__)
Lukas Jelonek's avatar
Lukas Jelonek committed
3
logger.setLevel(logging.DEBUG)
4

5
from dbxref import config
6
from itertools import groupby
Lukas Jelonek's avatar
Lukas Jelonek committed
7
8
9
from diskcache import Cache
from appdirs import user_cache_dir

10
11
import json

Lukas Jelonek's avatar
Lukas Jelonek committed
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def retrieve(dbxrefs, ignore_cache=False):
    cache = init_cache()

    # normalize db notation
    normalize_db_notation(dbxrefs)
    dbxrefs = sorted(dbxrefs, key=lambda x: x['db'])

    # lookup from cache
    uncached = []
    cached = []
    if ignore_cache:
      uncached = dbxrefs
    else :
      (cached, uncached) = find_cached_entries(cache, dbxrefs)

    # load uncached
    loaded_uncached = load_uncached_entries(uncached)
    cache_entries(cache, loaded_uncached)

    # compile results
32
    results = []
Lukas Jelonek's avatar
Lukas Jelonek committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
    results.extend(cached)
    results.extend(loaded_uncached)
    return results

def normalize_db_notation(dbxrefs):
  # take first prefix that matches the db
  for dbxref in dbxrefs:
    key = dbxref['db']
    if config.has_provider(key):
        provider = config.get_provider(key)
        for prefix in provider['prefixes']:
          if key.lower() ==  prefix.lower():
            dbxref['db'] = prefix
    logger.debug("'{}' -> '{}'".format(key, dbxref['db']))
47

48
def load_with_external_provider(provider, dbxrefs):
49
50
    logger.debug('Loading {0} via external provider'.format(dbxrefs))
    script = provider['retriever']['location']
51
    call = '{} {}'.format(script, ' '.join(list(map(toString, dbxrefs))))
52
53
54
55
56
    logger.debug("Running '{}'".format(call))
    import subprocess
    result = subprocess.check_output(call, shell=True)
    return json.loads(result.decode('utf-8'))

57
58
59
def load_with_internal_provider(provider, dbxrefs):
    import importlib
    retrieve_method = getattr(importlib.import_module(provider['retriever']['location']), 'retrieve')
60
61
    retrieved = retrieve_method(dbxrefs)
    return retrieved
62

63
64
def toString(dbxref):
    return '{}:{}'.format(dbxref['db'], dbxref['id'])
Lukas Jelonek's avatar
Lukas Jelonek committed
65
66
67
68
69
70
71
72
73
74

def init_cache():
  cachedir = user_cache_dir('dbxref')
  cache = Cache(cachedir)
  return cache

def cache_entries(cache, entries):
  expiration_time = 86400 # one day
  for e in entries:
    logger.debug('Caching {}'.format(e['id']))
75
    cache.set(e['id'], e, expire=expiration_time)
Lukas Jelonek's avatar
Lukas Jelonek committed
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

def find_cached_entries(cache, dbxrefs):
  cached = []
  uncached = []
  for d in dbxrefs:
    key = toString(d)
    if key in cache:
      logger.debug("Found {} in cache".format(key))
      cached.append(cache[key])
    else:
      uncached.append(d)
  return (cached, uncached)

def load_uncached_entries(dbxrefs):
  results = []
  for key, dbxrefs in groupby(dbxrefs, lambda x: x['db']):
      if config.has_provider(key):
          provider = config.get_provider(key)
          logger.debug('{0} is supported'.format(key))
95
96
97
98
99
100
101
          if 'retriever' in provider:
            if provider['retriever']['type'] == 'external':
                results.extend( load_with_external_provider(provider, list(dbxrefs)))
            elif provider['retriever']['type'] == 'internal':
                results.extend(load_with_internal_provider(provider, list(dbxrefs)))
            else:
                raise Exception('Unknown retriever type', provider['retriever']['type'])
Lukas Jelonek's avatar
Lukas Jelonek committed
102
          else:
103
104
            logger.debug('{0} is not supported'.format(key))
            results.extend( map(lambda x: {'id': toString(x), 'status': 'not supported'}, dbxrefs))
Lukas Jelonek's avatar
Lukas Jelonek committed
105
106
107
108
      else:
          logger.debug('{0} is not supported'.format(key))
          results.extend( map(lambda x: {'id': toString(x), 'status': 'not supported'}, dbxrefs))
  return (results)