Commit 011d0be3 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Fix bug: long html message is added to json when a ec number is not available

parent 5d8111f1
......@@ -35,25 +35,16 @@ def retrieve(dbxrefs, basic=True, references=True):
txt_url = entry['locations']['text'][0]
logger.debug('URL: %s', txt_url)
r = requests.get(txt_url)
logger.debug('Content: %s', r.text)
try:
# We expect a plain text document
# check if the document returned is a html document
# if it is something went from and we assume that
# it is a error page.
ls = r.text.replace('\n', ' ')
html = HTML.document_fromstring(ls).head.text_content()
# when everything is fine an exception was thrown for
# the last line
output = {'dbxref': entry['dbxref']}
output['message'] = html
if output['message'] == ' 500 Internal Server Error ':
output['message'] = '500 Internal Server Error; probably invalid ID'
documents.append(output)
except:
retrieved_entry = {}
if r.status_code < 400:
retrieved_entry = parse_flat_file(r.text)
retrieved_entry['dbxref'] = entry['dbxref']
documents.append(retrieved_entry)
elif r.status_code == 404:
retrieved_entry = {'status' : '404 Not found'}
else:
retrieved_entry = {'status' : r.status_code}
retrieved_entry['dbxref'] = entry['dbxref']
documents.append(retrieved_entry)
return documents
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment