Commit fce0ffe2 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Fix bug: dbxref crashes when a pfam entry is returned as html

parent 865554e1
......@@ -2,6 +2,7 @@
import dbxref.resolver
import requests
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import ParseError
import logging
import json
import argparse
......@@ -35,11 +36,12 @@ def retrieve(dbxrefs, basic=True, annotation=True):
logger.debug('URL: %s', xml_url)
r = requests.get(xml_url)
logger.debug('Content: %s', r.text)
root = ET.fromstring(r.text)
output = {'id': entry['dbxref']}
try:
root = ET.fromstring(r.text)
tree = str(ET.tostring(root))
if '<error>' in tree:
output['message'] = tree[tree.find('<error>')+7:tree.rfind('</error>')]
......@@ -52,7 +54,7 @@ def retrieve(dbxrefs, basic=True, annotation=True):
except (KeyError, AttributeError) as e:
logger.warn('Error in retrieving %s', str(entry))
raise
except RuntimeError as e:
except (ParseError, RuntimeError) as e:
output['message'] = 'an error occurred'
try:
html = HTML.document_fromstring(r.text.replace('\n', ' '))
......
......@@ -8,3 +8,11 @@ class TestPfam(unittest.TestCase):
documents = pfam.retrieve([{'db': 'PFAM', 'id': 'PF00083.23'}])
# this test failed due to an error due to missing None handling,
# so no assertions here. Once fixed. this should suffice
def test_renamed_family(self):
'''regression test for missing comment in pfam entry'''
documents = pfam.retrieve([{'db': 'PFAM', 'id': 'Tiny_TM_bacill'}])
# this test failed due to a redirect when a family was renamed
# unfortunately the redirect was not encoded in http headers, but in
# html markup (<meta http-equiv="Refresh" content="5; URL=/family/PF09680" />)
# so no assertions here. Once fixed. this should suffice
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment