Commit fce0ffe2 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Fix bug: dbxref crashes when a pfam entry is returned as html

parent 865554e1
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import dbxref.resolver import dbxref.resolver
import requests import requests
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from xml.etree.ElementTree import ParseError
import logging import logging
import json import json
import argparse import argparse
...@@ -35,11 +36,12 @@ def retrieve(dbxrefs, basic=True, annotation=True): ...@@ -35,11 +36,12 @@ def retrieve(dbxrefs, basic=True, annotation=True):
logger.debug('URL: %s', xml_url) logger.debug('URL: %s', xml_url)
r = requests.get(xml_url) r = requests.get(xml_url)
logger.debug('Content: %s', r.text) logger.debug('Content: %s', r.text)
root = ET.fromstring(r.text)
output = {'id': entry['dbxref']} output = {'id': entry['dbxref']}
try: try:
root = ET.fromstring(r.text)
tree = str(ET.tostring(root)) tree = str(ET.tostring(root))
if '<error>' in tree: if '<error>' in tree:
output['message'] = tree[tree.find('<error>')+7:tree.rfind('</error>')] output['message'] = tree[tree.find('<error>')+7:tree.rfind('</error>')]
...@@ -52,7 +54,7 @@ def retrieve(dbxrefs, basic=True, annotation=True): ...@@ -52,7 +54,7 @@ def retrieve(dbxrefs, basic=True, annotation=True):
except (KeyError, AttributeError) as e: except (KeyError, AttributeError) as e:
logger.warn('Error in retrieving %s', str(entry)) logger.warn('Error in retrieving %s', str(entry))
raise raise
except RuntimeError as e: except (ParseError, RuntimeError) as e:
output['message'] = 'an error occurred' output['message'] = 'an error occurred'
try: try:
html = HTML.document_fromstring(r.text.replace('\n', ' ')) html = HTML.document_fromstring(r.text.replace('\n', ' '))
......
...@@ -8,3 +8,11 @@ class TestPfam(unittest.TestCase): ...@@ -8,3 +8,11 @@ class TestPfam(unittest.TestCase):
documents = pfam.retrieve([{'db': 'PFAM', 'id': 'PF00083.23'}]) documents = pfam.retrieve([{'db': 'PFAM', 'id': 'PF00083.23'}])
# this test failed due to an error due to missing None handling, # this test failed due to an error due to missing None handling,
# so no assertions here. Once fixed. this should suffice # so no assertions here. Once fixed. this should suffice
def test_renamed_family(self):
'''regression test for missing comment in pfam entry'''
documents = pfam.retrieve([{'db': 'PFAM', 'id': 'Tiny_TM_bacill'}])
# this test failed due to a redirect when a family was renamed
# unfortunately the redirect was not encoded in http headers, but in
# html markup (<meta http-equiv="Refresh" content="5; URL=/family/PF09680" />)
# so no assertions here. Once fixed. this should suffice
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment