Commit 730fd9f0 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Add cache for entries

parent cdd8d6fe
...@@ -26,6 +26,7 @@ def main(): ...@@ -26,6 +26,7 @@ def main():
retrieve_parser = subparsers.add_parser('retrieve') retrieve_parser = subparsers.add_parser('retrieve')
retrieve_parser.set_defaults(func=retrieve) retrieve_parser.set_defaults(func=retrieve)
retrieve_parser.add_argument('dbxrefs', nargs=argparse.REMAINDER) retrieve_parser.add_argument('dbxrefs', nargs=argparse.REMAINDER)
retrieve_parser.add_argument('--ignore_cache', '-C', action='store_true', default=False, help="Ignore entries from cache. Fetched entries are still stored in cache.")
retrieve_parser.add_argument('--verbose', '-v', action='store_true', default=False, help="Show debug output") retrieve_parser.add_argument('--verbose', '-v', action='store_true', default=False, help="Show debug output")
args = parser.parse_args() args = parser.parse_args()
...@@ -53,7 +54,14 @@ def resolve(args, config): ...@@ -53,7 +54,14 @@ def resolve(args, config):
def retrieve(args, config): def retrieve(args, config):
from dbxref import retriever from dbxref import retriever
print(json.dumps(retriever.retrieve(resolver.convert_to_dbxrefs(args.dbxrefs)))) print(
json.dumps(
retriever.retrieve(
resolver.convert_to_dbxrefs(args.dbxrefs),
ignore_cache = args.ignore_cache
)
)
)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
from dbxref import config from dbxref import config
from itertools import groupby from itertools import groupby
from diskcache import Cache
from appdirs import user_cache_dir
import json import json
def retrieve(dbxrefs): def retrieve(dbxrefs, ignore_cache=False):
sorted(dbxrefs, key=lambda x: x['db'].lower()) # normalize db to lowercase to allow differently cased notations cache = init_cache()
# normalize db notation
normalize_db_notation(dbxrefs)
dbxrefs = sorted(dbxrefs, key=lambda x: x['db'])
# lookup from cache
uncached = []
cached = []
if ignore_cache:
uncached = dbxrefs
else :
(cached, uncached) = find_cached_entries(cache, dbxrefs)
# load uncached
loaded_uncached = load_uncached_entries(uncached)
cache_entries(cache, loaded_uncached)
# compile results
results = [] results = []
for key, dbxrefs in groupby(dbxrefs, lambda x: x['db']): results.extend(cached)
if config.has_provider(key): results.extend(loaded_uncached)
provider = config.get_provider(key) return results
logger.debug('{0} is supported'.format(key))
if provider['retriever']['type'] == 'external': def normalize_db_notation(dbxrefs):
results.extend( load_with_external_provider(provider, list(dbxrefs))) # take first prefix that matches the db
elif provider['retriever']['type'] == 'internal': for dbxref in dbxrefs:
results.extend(load_with_internal_provider(provider, list(dbxrefs))) key = dbxref['db']
else: if config.has_provider(key):
raise Exception('Unknown retriever type', provider['retriever']['type']) provider = config.get_provider(key)
else: for prefix in provider['prefixes']:
logger.debug('{0} is not supported'.format(key)) if key.lower() == prefix.lower():
results.extend( map(lambda x: {'id': toString(x), 'status': 'not supported'}, dbxrefs)) dbxref['db'] = prefix
return (results) logger.debug("'{}' -> '{}'".format(key, dbxref['db']))
def load_with_external_provider(provider, dbxrefs): def load_with_external_provider(provider, dbxrefs):
logger.debug('Loading {0} via external provider'.format(dbxrefs)) logger.debug('Loading {0} via external provider'.format(dbxrefs))
...@@ -40,3 +63,43 @@ def load_with_internal_provider(provider, dbxrefs): ...@@ -40,3 +63,43 @@ def load_with_internal_provider(provider, dbxrefs):
def toString(dbxref): def toString(dbxref):
return '{}:{}'.format(dbxref['db'], dbxref['id']) return '{}:{}'.format(dbxref['db'], dbxref['id'])
def init_cache():
cachedir = user_cache_dir('dbxref')
cache = Cache(cachedir)
return cache
def cache_entries(cache, entries):
expiration_time = 86400 # one day
for e in entries:
logger.debug('Caching {}'.format(e['id']))
cache.set(e['id'], e, expire=30)
def find_cached_entries(cache, dbxrefs):
cached = []
uncached = []
for d in dbxrefs:
key = toString(d)
if key in cache:
logger.debug("Found {} in cache".format(key))
cached.append(cache[key])
else:
uncached.append(d)
return (cached, uncached)
def load_uncached_entries(dbxrefs):
results = []
for key, dbxrefs in groupby(dbxrefs, lambda x: x['db']):
if config.has_provider(key):
provider = config.get_provider(key)
logger.debug('{0} is supported'.format(key))
if provider['retriever']['type'] == 'external':
results.extend( load_with_external_provider(provider, list(dbxrefs)))
elif provider['retriever']['type'] == 'internal':
results.extend(load_with_internal_provider(provider, list(dbxrefs)))
else:
raise Exception('Unknown retriever type', provider['retriever']['type'])
else:
logger.debug('{0} is not supported'.format(key))
results.extend( map(lambda x: {'id': toString(x), 'status': 'not supported'}, dbxrefs))
return (results)
...@@ -4,3 +4,5 @@ pyyaml ...@@ -4,3 +4,5 @@ pyyaml
lockfile lockfile
lxml lxml
pbr pbr
diskcache
appdirs
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment