Commit fd5f86b2 authored by lmueller's avatar lmueller
Browse files

added --basic option for enzyme, GO and SO retriever; added --relations option...

added --basic option for enzyme, GO and SO retriever; added --relations option for GO and SO retriever; added --references option for enzyme retriever
parent b13e0b04
......@@ -34,7 +34,7 @@
resources:
html: ["http://www.sequenceontology.org/browser/current_svn/term/SO:%i"]
obo: ["http://www.sequenceontology.org/browser/current_svn/export/term_only/obo/SO:%i"]
csv: ["http://www.sequenceontology.org/browser/current_svn/export/term_only/csv_text/SO:%i"]
tsv: ["http://www.sequenceontology.org/browser/current_svn/export/term_only/csv_text/SO:%i"]
# does not work
# check_existence: "http://www.sequenceontology.org/browser/current_svn/term/SO:%i"
retriever:
......@@ -83,7 +83,7 @@
resources:
html: ["http://www.ebi.ac.uk/QuickGO/GTerm?id=GO:%i"]
xml: ["http://www.ebi.ac.uk/QuickGO/GTerm?id=GO:%i&format=oboxml"]
json: ["https://www.ebi.ac.uk/QuickGO/services/ontology/go/terms/GO:%i"]
json: ["https://www.ebi.ac.uk/QuickGO/services/ontology/go/terms/GO:%i/complete"]
check_existence: "http://purl.obolibrary.org/obo/GO_%i"
retriever:
type: 'external'
......
#!/usr/bin/env python3
import env
import dbxref.config
import dbxref.resolver
......@@ -12,6 +13,8 @@ logger = logging.getLogger(__name__)
def main():
parser = argparse.ArgumentParser(description='Retrieve enzyme text documents for dbxrefs and convert them into json')
parser.add_argument('--basic', '-b', action='store_true', help='Include dbxref, definition, name and synonyms')
parser.add_argument('--references', '-r', action='store_true', help='Include uniprot dbxrefs')
parser.add_argument('dbxrefs', nargs=argparse.REMAINDER)
args = parser.parse_args()
resolved = dbxref.resolver.resolve(args.dbxrefs, check_existence=False)
......@@ -80,9 +83,9 @@ def main():
l = e.split(', ')
l[1] = l[1].replace(' ', '')
l[1] = l[1].replace(';', '')
refs.append(l)
refs.append(l[0])
if len(refs) > 0:
output['UniProtKB/Swiss-Prot'] = refs
output['uniprot'] = refs
if len(reaction) > 0:
if 'reaction_catalysed' in output:
output['reaction_catalysed'].append(reaction)
......@@ -93,28 +96,29 @@ def main():
output['comments'].append(comment)
else:
output['comments'] = [comment]
documents.append(format_output(output))
documents.append(format_output(output, args))
print(json.dumps(documents))
def format_output(d):
def format_output(d, args):
out = {'dbxref': d['dbxref']}
definition = {}
if 'name' in d:
out['name'] = d['name']
if 'alternative_names' in d:
out['synonyms'] = d.pop('alternative_names')
if 'UniProtKB/Swiss-Prot' in d:
out['UniProtKB/Swiss-Prot'] = d['UniProtKB/Swiss-Prot']
if 'reaction_catalysed' in d:
definition['reaction_catalysed'] = d['reaction_catalysed']
if 'cofactors' in d:
definition['cofactors'] = d['cofactors']
if 'comments' in d:
definition['comments'] = d['comments']
if len(definition) == 1:
out['deifinition'] = definition[0]
elif len(definition) > 1:
out['deifinition'] = definition
if args.basic:
if 'name' in d:
out['name'] = d['name']
if 'alternative_names' in d:
out['synonyms'] = d.pop('alternative_names')
if 'reaction_catalysed' in d:
definition['reaction_catalysed'] = d['reaction_catalysed']
if 'cofactors' in d:
definition['cofactors'] = d['cofactors']
if 'comments' in d:
definition['comments'] = d['comments']
if len(definition) == 1:
out['deifinition'] = definition[0]
elif len(definition) > 1:
out['deifinition'] = definition
if 'uniprot' in d and args.references:
out['uniprot'] = d['uniprot']
return (out)
main()
#!/usr/bin/env python3
import env
import dbxref.config
import dbxref.resolver
......@@ -11,6 +12,8 @@ logger = logging.getLogger(__name__)
def main():
parser = argparse.ArgumentParser(description='Retrieve gene ontology documents for dbxrefs and convert them into json')
parser.add_argument('--basic', '-b', action='store_true', help='Include dbxref, definition, name and synonyms')
parser.add_argument('--relations', '-r', action='store_true', help='Include dbxrefs and type of parent and children')
parser.add_argument('dbxrefs', nargs=argparse.REMAINDER)
args = parser.parse_args()
resolved = dbxref.resolver.resolve(args.dbxrefs, check_existence=False)
......@@ -20,13 +23,49 @@ def main():
logger.debug('URL: %s', json_url)
r = requests.get(json_url)
logger.debug('Content: %s', r.text)
output = {'dbxref': entry['dbxref']}
d = json.loads(r.text)
output['name'] = d['results'][0]['name']
output['synonyms'] = d['results'][0]['synonyms']
output['children'] = d['results'][0]['children']
output['definition'] = d['results'][0]['definition']['text']
output = {'dbxref': entry['dbxref']}
if args.basic:
output['definition'] = d['results'][0]['definition']['text']
output['name'] = d['results'][0]['name']
output['synonyms'] = d['results'][0]['synonyms']
if args.relations:
output['relations']= {'children': d['results'][0]['children']}
for child in output['relations']['children']:
child['type'] = child.pop('relation')
output['relations']['parent'] = parse_history(d['results'][0]['history'])
documents.append(output)
print (json.dumps(documents))
def parse_history(h):
out = []
for history in reversed(h):
if history['category'] == "RELATION":
if history['action'] == "Updated" or history['action'] == "Added":
out.append(history)
if history['action'] == "Deleted":
for i in reversed(range(len(out))):
if out[i]['text'] == history['text']:
del out[i]
break
for i in range(len(out)):
out[i] = parse_text(out[i]['text'])
return (out)
def parse_text(t):
words = t.split(' ')
type = ''
out = {}
for word in words:
if 'GO:' in word:
out['id'] = word
break
else:
if type == '':
type = word
else:
type += "_" + word
out['type'] = type
return (out)
main()
#!/usr/bin/env python3
import env
import dbxref.config
import dbxref.resolver
......@@ -6,11 +7,14 @@ import requests
import logging
import json
import argparse
from lxml import etree
logger = logging.getLogger(__name__)
def main():
parser = argparse.ArgumentParser(description='Retrieve sequence ontology csv documents for dbxrefs and convert them into json')
parser.add_argument('--basic', '-b', action='store_true', help='Include dbxref, definition, name and synonyms')
parser.add_argument('--relations', '-r', action='store_true', help='Include dbxrefs to parents and children')
parser.add_argument('dbxrefs', nargs=argparse.REMAINDER)
args = parser.parse_args()
resolved = dbxref.resolver.resolve(args.dbxrefs, check_existence=False)
......@@ -35,11 +39,29 @@ def main():
else:
elements.append(line.strip())
d = resolve_elements(elements)
if 'id' in d and d['id'] == entry['dbxref']:
output = format_output(d)
output = {'dbxref': entry['dbxref']}
if 'id' in d and d['id'] == entry['dbxref'] and args.basic:
output.update(format_output(d))
if args.relations:
output['relations'] = resolve_relations(entry)
documents.append(output)
print (json.dumps(documents))
def resolve_relations(entry):
tsv_url = entry['locations']['tsv'][0]
r = requests.get(tsv_url)
lines = r.text.strip().split('\n')
lines[0] = lines[0].split('\t')
lines[1] = lines[1].split('\t')
dic = {'parent': []}
if lines[1][3] != '':
dic['parent'] = lines[1][3].split(',')
if len(lines[1]) == 5:
dic['children'] = lines[1][4].split(',')
else:
dic['children'] = []
return (dic)
def resolve_elements(es):
dict = {}
for element in es:
......@@ -55,8 +77,6 @@ def resolve_elements(es):
def format_output(d):
out = {}
if 'id' in d:
out['dbxref'] = d['id']
if 'def' in d:
de = d['def'].split('" ')
de = de[0].replace('"', '')
......@@ -72,10 +92,6 @@ def format_output(d):
out['namespace'] = d['namespace']
else:
out['namespace'] = ""
if 'is_a' in d:
out['parent'] = d['is_a']
else:
out['parent'] = ""
if 'synonym' in d:
out['synonyms'] = []
for synonym in d['synonym']:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment