Commit c7544bbd authored by lmueller's avatar lmueller
Browse files

retriever return an entry with id and error message if an invalid dbxref is...

retriever return an entry with id and error message if an invalid dbxref is given instead of returning an entry with id only
parent 2fc97bf6
......@@ -13,8 +13,8 @@ def retrieve(strings, location=''):
sorted(dbxrefs, key=lambda x: x['db'])
results = []
for key, dbxrefs in groupby(dbxrefs, lambda x: x['db']):
if key in providers and 'retriever' in providers[key]:
provider = providers[key]
if key.lower() in providers and 'retriever' in providers[key.lower()]:
provider = providers[key.lower()]
logger.debug('{0} is supported'.format(key))
if provider['retriever']['type'] == 'external':
retrieved = load_with_external_provider(provider, list(dbxrefs), location)
......
......@@ -53,7 +53,7 @@
html: ["http://www.ncbi.nlm.nih.gov/pubmed/%i"]
check_existence: "http://www.ncbi.nlm.nih.gov/pubmed/%i"
- name: Protein Families
prefixes: ["PFAM", "pfam"]
prefixes: ["PFAM", "Pfam", "pfam"]
resources:
html: ["http://pfam.xfam.org/family/%i"]
xml: ["http://pfam.xfam.org/family/%i?output=xml"]
......
......@@ -8,6 +8,7 @@ import logging
import json
import argparse
import re
from bs4 import BeautifulSoup as BS
logger = logging.getLogger(__name__)
......@@ -29,78 +30,87 @@ def main():
refs = []
comment = ""
reaction = ""
for line in lines:
line_elements = line.strip().split(' ')
if line_elements[0] == 'DE':
output['name'] = line_elements[1]
if line_elements[0] == 'AN':
if 'alternative_names' in output:
output['alternative_names'].append(line_elements[1])
else:
output['alternative_names'] = [line_elements[1]]
if line_elements[0] == 'CA':
if re.match(re.compile('^\(\d+\) '), line_elements[1]):
if len(reaction) == 0:
reaction += line_elements[1][line_elements[1].find(' ')+1:]
ls = r.text.replace('\n', ' ')
soup = BS(ls, 'lxml')
if soup.find('title') is not None:
output['message'] = soup.head.title.string
if output['message'] == '500 Internal Server Error':
output['message'] += '; probably invalid ID'
else:
for line in lines:
line_elements = line.strip().split(' ')
if line_elements[0] == 'DE':
output['name'] = line_elements[1]
if line_elements[0] == 'AN':
if 'alternative_names' in output:
output['alternative_names'].append(line_elements[1])
else:
if 'reaction_catalyzed' in output:
output['reaction_catalyzed'].append(reaction)
output['alternative_names'] = [line_elements[1]]
if line_elements[0] == 'CA':
if re.match(re.compile('^\(\d+\) '), line_elements[1]):
if len(reaction) == 0:
reaction += line_elements[1][line_elements[1].find(' ')+1:]
else:
output['reaction_catalyzed'] = [reaction]
reaction = line_elements[1][line_elements[1].find(' ')+1:]
else:
if len(reaction) == 0:
reaction = line_elements[1]
if 'reaction_catalyzed' in output:
output['reaction_catalyzed'].append(reaction)
else:
output['reaction_catalyzed'] = [reaction]
reaction = line_elements[1][line_elements[1].find(' ')+1:]
else:
reaction = reaction + " " + line_elements[1]
if line_elements[0] == 'CF':
if 'cofactors' in output:
output['cofactors'].append(line_elements[1])
else:
output['cofactors'] = [line_elements[1]]
if line_elements[0] == 'CC':
if "-!-" in line_elements[1]:
if len(comment) == 0:
comment += line_elements[1][4:]
if len(reaction) == 0:
reaction = line_elements[1]
else:
reaction = reaction + " " + line_elements[1]
if line_elements[0] == 'CF':
if 'cofactors' in output:
output['cofactors'].append(line_elements[1])
else:
if 'comments' in output:
output['comments'].append(comment)
output['cofactors'] = [line_elements[1]]
if line_elements[0] == 'CC':
if "-!-" in line_elements[1]:
if len(comment) == 0:
comment += line_elements[1][4:]
else:
output['comments'] = [comment]
comment = line_elements[1][4:]
if 'comments' in output:
output['comments'].append(comment)
else:
output['comments'] = [comment]
comment = line_elements[1][4:]
else:
comment += line_elements[2]
if line_elements[0] == 'PR':
link = line_elements[1].replace(';', '').split()
if 'prosite' in output:
output['prosite'].append(link[1])
else:
output['prosite'] = [link[1]]
if line_elements[0] == 'DR':
for i in range(1, len(line_elements)):
for e in line_elements[i].split('; '):
if len(e) > 1:
l = e.split(', ')
l[1] = l[1].replace(' ', '')
l[1] = l[1].replace(';', '')
refs.append('UniProtKB/Swiss-Prot:' + l[0])
output['dbxrefs'] = refs
if len(reaction) > 0:
if 'reaction_catalyzed' in output:
output['reaction_catalyzed'].append(reaction)
else:
comment += line_elements[2]
if line_elements[0] == 'PR':
link = line_elements[1].replace(';', '').split()
if 'prosite' in output:
output['prosite'].append(link[1])
output['reaction_catalyzed'] = [reaction]
if len(comment) > 0:
if 'comments' in output:
output['comments'].append(comment)
else:
output['prosite'] = [link[1]]
if line_elements[0] == 'DR':
for i in range(1, len(line_elements)):
for e in line_elements[i].split('; '):
if len(e) > 1:
l = e.split(', ')
l[1] = l[1].replace(' ', '')
l[1] = l[1].replace(';', '')
refs.append('UniProtKB/Swiss-Prot:' + l[0])
output['dbxrefs'] = refs
if len(reaction) > 0:
if 'reaction_catalyzed' in output:
output['reaction_catalyzed'].append(reaction)
else:
output['reaction_catalyzed'] = [reaction]
if len(comment) > 0:
if 'comments' in output:
output['comments'].append(comment)
else:
output['comments'] = [comment]
output['comments'] = [comment]
documents.append(format_output(output, args))
print(json.dumps(documents))
def read_basic(d):
out = {}
definition = {}
if 'message' in d:
out['message'] = d['message']
if 'name' in d:
out['name'] = d['name']
if 'alternative_names' in d:
......@@ -125,7 +135,8 @@ def format_output(d, args):
out['dbxrefs'] = d['dbxrefs']
if not args.basic and not args.references:
out.update(read_basic(d))
out['dbxrefs'] = d['dbxrefs']
if 'dbxrefs' in d:
out['dbxrefs'] = d['dbxrefs']
return (out)
main()
......@@ -25,7 +25,9 @@ def main():
logger.debug('Content: %s', r.text)
d = json.loads(r.text)
output = {'id': entry['dbxref']}
if not 'messages' in d:
if 'messages' in d:
output['message'] = '; '.join(d['messages'])
else:
if args.basic:
output.update(read_basic(d))
if args.relations:
......@@ -33,8 +35,6 @@ def main():
if not args.basic and not args.relations:
output.update(read_basic(d))
output.update(read_relations(d))
else:
output['messages'] = d['messages']
documents.append(output)
print (json.dumps(documents))
......
......@@ -34,15 +34,18 @@ def main():
output = {'id': entry['dbxref']}
for child in root.findall('pfam:entry', ns):
if args.basic:
output.update(read_basic(child))
if args.annotation:
output.update(read_annotation(child))
tree = str(ET.tostring(root))
if '<error>' in tree:
output['message'] = tree[tree.find('<error>')+7:tree.rfind('</error>')]
else:
for child in root.findall('pfam:entry', ns):
if args.basic:
output.update(read_basic(child))
if args.annotation:
output.update(read_annotation(child))
documents.append(output)
print(json.dumps(documents))
def read_basic(entry):
description = entry.find('pfam:description', ns).text.strip()
return {'description': description}
......@@ -60,8 +63,8 @@ def read_annotation(entry):
terms = category.findall('pfam:term', ns)
for term in terms:
annotation['terms'].append({
'id': term.attrib['go_id'],
'description': term.text
'id': term.attrib['go_id'],
'description': term.text
})
return annotation
......
......@@ -7,7 +7,7 @@ import requests
import logging
import json
import argparse
from lxml import etree
from bs4 import BeautifulSoup as BS
logger = logging.getLogger(__name__)
......@@ -39,7 +39,12 @@ def main():
else:
elements.append(line.strip())
output = {'id': entry['dbxref']}
if not '<title>500 Internal Server Error</title>' in elements:
soup = BS(r.text.replace('\n', ' '), 'lxml')
if soup.find('title') is not None:
output['message'] = soup.head.title.string
if output['message'] == '500 Internal Server Error':
output['message'] += '; probably invalid ID'
else:
d = resolve_elements(elements)
if 'id' in d and d['id'] == entry['dbxref'] and args.basic:
output.update(format_output(d))
......
......@@ -4,6 +4,7 @@ import dbxref.config
import dbxref.resolver
import requests
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup as BS
import logging
import json
import argparse
......@@ -54,7 +55,15 @@ def main():
if args.features:
output['features'] = read_features(child)
except:
pass
soup = BS
try:
soup = BS(r.text.replace('\n', ' '), 'lxml')
output['message'] = 'an error occurred'
for child in soup.findAll('div'):
if child.get('id') == 'noResultsMessage':
output['message'] = 'no results found; probably invalid ID'
except:
pass
documents.append(output)
print(json.dumps(documents))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment