Commit 730fd9f0 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Add cache for entries

parent cdd8d6fe
......@@ -26,6 +26,7 @@ def main():
retrieve_parser = subparsers.add_parser('retrieve')
retrieve_parser.set_defaults(func=retrieve)
retrieve_parser.add_argument('dbxrefs', nargs=argparse.REMAINDER)
retrieve_parser.add_argument('--ignore_cache', '-C', action='store_true', default=False, help="Ignore entries from cache. Fetched entries are still stored in cache.")
retrieve_parser.add_argument('--verbose', '-v', action='store_true', default=False, help="Show debug output")
args = parser.parse_args()
......@@ -53,7 +54,14 @@ def resolve(args, config):
def retrieve(args, config):
from dbxref import retriever
print(json.dumps(retriever.retrieve(resolver.convert_to_dbxrefs(args.dbxrefs))))
print(
json.dumps(
retriever.retrieve(
resolver.convert_to_dbxrefs(args.dbxrefs),
ignore_cache = args.ignore_cache
)
)
)
if __name__ == "__main__":
main()
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
from dbxref import config
from itertools import groupby
from diskcache import Cache
from appdirs import user_cache_dir
import json
def retrieve(dbxrefs):
sorted(dbxrefs, key=lambda x: x['db'].lower()) # normalize db to lowercase to allow differently cased notations
def retrieve(dbxrefs, ignore_cache=False):
cache = init_cache()
# normalize db notation
normalize_db_notation(dbxrefs)
dbxrefs = sorted(dbxrefs, key=lambda x: x['db'])
# lookup from cache
uncached = []
cached = []
if ignore_cache:
uncached = dbxrefs
else :
(cached, uncached) = find_cached_entries(cache, dbxrefs)
# load uncached
loaded_uncached = load_uncached_entries(uncached)
cache_entries(cache, loaded_uncached)
# compile results
results = []
for key, dbxrefs in groupby(dbxrefs, lambda x: x['db']):
if config.has_provider(key):
provider = config.get_provider(key)
logger.debug('{0} is supported'.format(key))
if provider['retriever']['type'] == 'external':
results.extend( load_with_external_provider(provider, list(dbxrefs)))
elif provider['retriever']['type'] == 'internal':
results.extend(load_with_internal_provider(provider, list(dbxrefs)))
else:
raise Exception('Unknown retriever type', provider['retriever']['type'])
else:
logger.debug('{0} is not supported'.format(key))
results.extend( map(lambda x: {'id': toString(x), 'status': 'not supported'}, dbxrefs))
return (results)
results.extend(cached)
results.extend(loaded_uncached)
return results
def normalize_db_notation(dbxrefs):
# take first prefix that matches the db
for dbxref in dbxrefs:
key = dbxref['db']
if config.has_provider(key):
provider = config.get_provider(key)
for prefix in provider['prefixes']:
if key.lower() == prefix.lower():
dbxref['db'] = prefix
logger.debug("'{}' -> '{}'".format(key, dbxref['db']))
def load_with_external_provider(provider, dbxrefs):
logger.debug('Loading {0} via external provider'.format(dbxrefs))
......@@ -40,3 +63,43 @@ def load_with_internal_provider(provider, dbxrefs):
def toString(dbxref):
return '{}:{}'.format(dbxref['db'], dbxref['id'])
def init_cache():
cachedir = user_cache_dir('dbxref')
cache = Cache(cachedir)
return cache
def cache_entries(cache, entries):
expiration_time = 86400 # one day
for e in entries:
logger.debug('Caching {}'.format(e['id']))
cache.set(e['id'], e, expire=30)
def find_cached_entries(cache, dbxrefs):
cached = []
uncached = []
for d in dbxrefs:
key = toString(d)
if key in cache:
logger.debug("Found {} in cache".format(key))
cached.append(cache[key])
else:
uncached.append(d)
return (cached, uncached)
def load_uncached_entries(dbxrefs):
results = []
for key, dbxrefs in groupby(dbxrefs, lambda x: x['db']):
if config.has_provider(key):
provider = config.get_provider(key)
logger.debug('{0} is supported'.format(key))
if provider['retriever']['type'] == 'external':
results.extend( load_with_external_provider(provider, list(dbxrefs)))
elif provider['retriever']['type'] == 'internal':
results.extend(load_with_internal_provider(provider, list(dbxrefs)))
else:
raise Exception('Unknown retriever type', provider['retriever']['type'])
else:
logger.debug('{0} is not supported'.format(key))
results.extend( map(lambda x: {'id': toString(x), 'status': 'not supported'}, dbxrefs))
return (results)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment