Commit d60fa151 authored by lmueller's avatar lmueller
Browse files

switched from ncbi to ebi for taxonomy retriever

parent 7943203b
......@@ -27,6 +27,7 @@
prefixes: ["Taxon", "taxon", "taxid"]
resources:
html: ["http://www.uniprot.org/taxonomy/%i"]
json: ["https://www.ebi.ac.uk/ena/data/taxonomy/v1/taxon/tax-id/%i"]
xml: ["http://www.uniprot.org/taxonomy/%i.rdf"]
xml_ncbi: ["https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=%i"]
check_existence: "http://www.uniprot.org/taxonomy/%i"
......
......@@ -16,6 +16,9 @@ def main():
parser.add_argument('--relations', '-r', action='store_true', help='Include id, parents and children')
parser.add_argument('dbxrefs', nargs=argparse.REMAINDER)
args = parser.parse_args()
if not args.basic and not args.relations:
args.basic = True
args.relations = True
resolved = dbxref.resolver.resolve(args.dbxrefs, check_existence=False)
documents = []
for entry in resolved:
......@@ -32,9 +35,6 @@ def main():
output.update(read_basic(d))
if args.relations:
output.update(read_relations(d))
if not args.basic and not args.relations:
output.update(read_basic(d))
output.update(read_relations(d))
documents.append(output)
print (json.dumps(documents))
......
......@@ -4,43 +4,62 @@ import env
import dbxref.config
import dbxref.resolver
import requests
import xml.etree.ElementTree as ET
import logging
import json
import argparse
logger = logging.getLogger(__name__)
ns = {'TaxaSet': 'https://eutils.ncbi.nlm.nih.gov/'}
def main():
parser = argparse.ArgumentParser(description='Retrieve taxonomy xml documents for dbxrefs and convert them into json')
parser.add_argument('--basic', '-b', action='store_true', help='Include dbxref and ......................')
parser.add_argument('--basic', '-b', action='store_true', help='Include dbxref, scientificName, commonName, lineage and rank')
parser.add_argument('--geneticcodes', '-g', action='store_true', help='Include geneticCode and mitochondrialGeneticCode')
parser.add_argument('dbxrefs', nargs=argparse.REMAINDER)
args = parser.parse_args()
if not args.basic and not args.geneticcodes:
args.basic = True
args.geneticcodes = True
resolved = dbxref.resolver.resolve(args.dbxrefs, check_existence=False)
documents = []
for entry in resolved:
xml_url = entry['locations']['xml_ncbi'][0]
logger.debug('URL: %s', xml_url)
r = requests.get(xml_url)
json_url = entry['locations']['json'][0]
logger.debug('URL: %s', json_url)
r = requests.get(json_url)
logger.debug('Content: %s', r.text)
root = ET.fromstring(r.text)
output = {'id': entry['dbxref']}
error = root.find('ERROR')
if error is not None:
output['message'] = error.text.strip()
d = {}
try:
d = json.loads(r.text)
except:
pass
if len(d) > 0:
if args.basic:
output.update(read_basic(d))
if args.geneticcodes:
output.update(read_geneticCodes(d))
else:
for child in root.findall('Taxon'):
output['name'] = child.find('ScientificName').text.strip()
lineage = child.find('Lineage')
if lineage.text is not None:
output['lineage'] = lineage.text.strip()
else:
output['lineage'] = 'No lineage found'
if 'name' not in output and 'message' not in output:
output['message'] = 'No entries found! Possibly invalid ID provided'
output['message'] = "An error occurred! probably invalid ID"
documents.append(output)
print (json.dumps(documents))
def read_basic(d):
out = {}
if 'scientificName' in d:
out['scientificName'] = d['scientificName']
if 'commonName' in d:
out['commonName'] = d['commonName']
if 'lineage' in d:
out['lineage'] = d['lineage']
if 'rank' in d:
out['rank'] = d['rank']
return (out)
def read_geneticCodes(d):
out = {'geneticCodes': {}}
if 'geneticCode' in d:
out['geneticCodes']['geneticCode'] = d['geneticCode']
if 'mitochondrialGeneticCode' in d:
out['geneticCodes']['mitochondrialGeneticCode'] = d['mitochondrialGeneticCode']
return (out)
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment