Commit dc085e1f authored by lmueller's avatar lmueller
Browse files

created retriever for enzyme, GO and SO

parent 4da4423e
- name: Enzyme
prefixes: [EC, Enzyme]
resources:
html: ["http://enzyme.expasy.org/EC/%i"]
check_existence: "http://enzyme.expasy.org/EC/%i.txt"
html: ["https://enzyme.expasy.org/EC/%i"]
text: ["https://enzyme.expasy.org/EC/%i.txt"]
check_existence: "https://enzyme.expasy.org/EC/%i.txt"
retriever:
type: 'external'
location: 'scripts/retrieve_enzyme.py'
- name: Gene Identifier
prefixes: [GI]
resources:
......@@ -29,8 +33,12 @@
prefixes: ["SO"]
resources:
html: ["http://www.sequenceontology.org/browser/current_svn/term/SO:%i"]
csv: ["http://www.sequenceontology.org/browser/current_svn/export/term_only/csv_text/SO:%i"]
# does not work
# check_existence: "http://www.sequenceontology.org/browser/current_svn/term/SO:%i"
retriever:
type: 'external'
location: 'scripts/retrieve_sequence_ontology.py'
- name: RFAM
prefixes: ["RFAM"]
resources:
......@@ -74,7 +82,11 @@
resources:
html: ["http://www.ebi.ac.uk/QuickGO/GTerm?id=GO:%i"]
xml: ["http://www.ebi.ac.uk/QuickGO/GTerm?id=GO:%i&format=oboxml"]
json: ["https://www.ebi.ac.uk/QuickGO/services/ontology/go/terms/GO:%i"]
check_existence: "http://purl.obolibrary.org/obo/GO_%i"
retriever:
type: 'external'
location: 'scripts/retrieve_gene_ontology.py'
- name: HTTP
prefixes: ["http", "https"]
resources:
......
#!/usr/bin/env python3
import env
import dbxref.config
import dbxref.resolver
import requests
import json
import argparse
import re
def main():
parser = argparse.ArgumentParser(description='Retrieve enzyme text documents for dbxrefs and convert them into json')
parser.add_argument('dbxrefs', nargs=argparse.REMAINDER)
args = parser.parse_args()
resolved = dbxref.resolver.resolve(args.dbxrefs, check_existence=False)
documents = []
for entry in resolved:
txt_url = entry['locations']['text'][0]
r = requests.get(txt_url)
lines = r.text.split('\n')
output = {'dbxref': entry['dbxref']}
refs = []
comment = ""
reaction = ""
for line in lines:
line_elements = line.strip().split(' ')
if line_elements[0] == 'DE':
output['name'] = line_elements[1]
if line_elements[0] == 'AN':
if 'alternative_names' in output:
output['alternative_names'].append(line_elements[1])
else:
output['alternative_names'] = [line_elements[1]]
if line_elements[0] == 'CA':
if re.match(re.compile('^\(\d+\) '), line_elements[1]):
if len(reaction) == 0:
reaction += line_elements[1][line_elements[1].find(' ')+1:]
else:
if 'reaction_catalysed' in output:
output['reaction_catalysed'].append(reaction)
else:
output['reaction_catalysed'] = [reaction]
reaction = line_elements[1][line_elements[1].find(' ')+1:]
else:
if len(reaction) == 0:
reaction = line_elements[1]
else:
reaction = reaction + " " + line_elements[1]
if line_elements[0] == 'CF':
if 'cofactors' in output:
output['cofactors'].append(line_elements[1])
else:
output['cofactors'] = [line_elements[1]]
if line_elements[0] == 'CC':
if "-!-" in line_elements[1]:
if len(comment) == 0:
comment += line_elements[1][4:]
else:
if 'comments' in output:
output['comments'].append(comment)
else:
output['comments'] = [comment]
comment = line_elements[1][4:]
else:
comment += line_elements[2]
if line_elements[0] == 'PR':
link = line_elements[1].replace(';', '').split()
if 'prosite' in output:
output['prosite'].append(link[1])
else:
output['prosite'] = [link[1]]
if line_elements[0] == 'DR':
for i in range(1, len(line_elements)):
for e in line_elements[i].split('; '):
if len(e) > 1:
l = e.split(', ')
l[1] = l[1].replace(' ', '')
l[1] = l[1].replace(';', '')
refs.append(l)
if len(refs) > 0:
output['UniProtKB/Swiss-Prot'] = refs
if len(reaction) > 0:
if 'reaction_catalysed' in output:
output['reaction_catalysed'].append(reaction)
else:
output['reaction_catalysed'] = [reaction]
if len(comment) > 0:
if 'comments' in output:
output['comments'].append(comment)
else:
output['comments'] = [comment]
documents.append(output)
print(json.dumps(documents))
main()
#!/usr/bin/env python3
import env
import dbxref.config
import dbxref.resolver
import requests
import json
import argparse
def main():
parser = argparse.ArgumentParser(description='Retrieve gene ontology documents for dbxrefs and convert them into json')
parser.add_argument('dbxrefs', nargs=argparse.REMAINDER)
args = parser.parse_args()
resolved = dbxref.resolver.resolve(args.dbxrefs, check_existence=False)
documents = []
for entry in resolved:
json_url = entry['locations']['json'][0]
r = requests.get(json_url)
output = {'dbxref': entry['dbxref']}
d = json.loads(r.text)
output['results'] = d['results']
# for result in d['results']:
# output.update(result)
# output.update(d['results'][0])
documents.append(output)
print (json.dumps(documents))
main()
#!/usr/bin/env python3
import env
import dbxref.config
import dbxref.resolver
import requests
import json
import argparse
def main():
parser = argparse.ArgumentParser(description='Retrieve sequence ontology csv documents for dbxrefs and convert them into json')
parser.add_argument('dbxrefs', nargs=argparse.REMAINDER)
args = parser.parse_args()
resolved = dbxref.resolver.resolve(args.dbxrefs, check_existence=False)
documents = []
for entry in resolved:
csv_url = entry['locations']['csv'][0]
r = requests.get(csv_url)
elements = r.text.strip().replace('\n', '\t').split('\t')
output = {'dbxref': entry['dbxref'], 'name': elements[7], 'definition': elements[8], 'parent': elements[9]}
documents.append(output)
print (json.dumps(documents))
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment