Commit e4a26419 authored by lmueller's avatar lmueller
Browse files

use retrieve function in dbxref retriever.py instead of calling every retriever skript separately

parent e9e5fe63
......@@ -2,48 +2,29 @@
import json
import argparse
import os
import sys
import subprocess
parser = argparse.ArgumentParser(description='Resolve dbxrefs from json file')
parser.add_argument('--input', '-i', required=True, help='The json input document')
parser.add_argument('--scripts', '-s', required=True, help='Location of the retriever scripts')
parser.add_argument('--retriever', '-s', required=True, help='Location of retriever')
parser.add_argument('--output', '-o', required=True, help='Name of the output document')
args = parser.parse_args()
basename = args.retriever.replace('scripts/', '')
sys.path.append(basename)
sys.path.append(basename + 'dbxref')
from retriever import retrieve
json_file = open(args.input).read()
j = json.loads(json_file)
def sort_refs(ref_list):
refs = {}
for ref in ref_list:
sp = ref.split(':')
if sp[0] in refs:
refs[sp[0]].append(sp[0] + ":" + sp[1])
else:
refs[sp[0]] = [sp[0] + ":" + sp[1]]
return (refs)
script_dir = args.scripts
di = []
for c in j['computations']:
for r in c['results']:
refs = sort_refs(r['informations'])
if 'EC' in refs:
di.append(json.loads(str(subprocess.check_output(script_dir + "retrieve_enzyme.py " + " ".join(refs['EC']), shell=True).rstrip())[2:-1]))
if 'GO' in refs:
di.append(json.loads(str(subprocess.check_output(script_dir + "retrieve_gene_ontology.py " + " ".join(refs['GO']), shell=True).rstrip())[2:-1]))
if 'PFAM' in refs:
di.append(json.loads(str(subprocess.check_output(script_dir + "retrieve_pfam.py " + " ".join(refs['PFAM']), shell=True).rstrip())[2:-1]))
if 'SO' in refs:
di.append(json.loads(str(subprocess.check_output(script_dir + "retrieve_sequence_ontology.py " + " ".join(refs['SO']), shell=True).rstrip())[2:-1]))
if 'UniProtKB/Swiss-Prot' in refs:
di.append(json.loads(str(subprocess.check_output(script_dir + "retrieve_uniprot.py " + " ".join(refs['UniProtKB/Swiss-Prot']), shell=True).rstrip())[2:-1]))
# print (json.dumps(di))
for r in c['results']:
di = retrieve(r['informations'], basename)
output_filename = args.output
with open(output_filename, 'w') as o:
json.dump(di, o)
json.dump(di, o)
{
"id" : "gi|15963754|ref|NP_384107.1|",
"computations" : [
{
"tool" : {
"database" : "/vol/biodb/ghostx/uniprot",
"name" : "ghostx",
"version" : "1.3.6"
},
"results" : [
{
"score" : "42",
"alignment" : "|||||||||||||||||||||||||||",
"informations" : [
"UniProtKB/Swiss-Prot:Q5XI95",
"UniProtKB/Swiss-Prot:Q70UN9",
"GO:0008270",
"GO:0004024",
"GO:0004930",
"EC:1.1.1.1",
"EC:2.2.1.1",
"PFAM:PF00002",
"PFAM:PF08240",
"SO:0000022",
"SO:0000714"
]
}
]
}
]
}
{
"id" : "gi|15963754|ref|NP_384107.1|",
"computations" : [
{
"tool" : {
"database" : "/vol/biodb/ghostx/uniprot",
"name" : "ghostx",
"version" : "1.3.6"
},
"results" : [
{
"score" : "42",
"alignment" : "|||||||||||||||||||||||||||",
"informations" : [
"UniProtKB/Swiss-Prot:Q5XI95",
"UniProtKB/Swiss-Prot:Q70UN9",
"GO:0008270",
"GO:0004024",
"GO:0004930",
"EC:1.1.1.1",
"EC:2.2.1.1",
"PFAM:PF00002",
"PFAM:PF08240",
"SO:0000022",
"SO:0000714",
"GO:0005737",
"GO:0004022",
"GO:0008270",
"Pfam:PF08240",
"SO:0000380",
"SO:0000026",
"SO:0000020",
"UniProtKB/Swiss-Prot:P09370",
"UniProtKB/Swiss-Prot:P22246",
"UniProtKB/Swiss-Prot:P07161",
"UniProtKB/Swiss-Prot:P12854",
"UniProtKB/Swiss-Prot:P08843"
]
}
]
}
]
}
{"queryId": "000001", "computations": [{"results": {"alignment": ".................................................", "informations": ["UniProtKB/Swiss-Prot:P07327", "EC:1.1.1.1", "GO:0006915", "GO:0042981", "UniProtKB/Swiss-Prot:P28469", "UniProtKB/Swiss-Prot:Q5RBP7", "PFAM:PF00002", "SO:0000715", "SO:0000022", "SO:0000380", "UniProtKB/Swiss-Prot:P25405", "GO:0043065", "UniProtKB/Swiss-Prot:P00325"], "score": "2"}, "running_statistics": "Blablablablabla", "tool": {"name": "boomstick", "parameters": ["input", "42", "output/location"], "version": "2.4.1"}}]}
{"queryId": "000001", "computations": [{"results": {"alignment": ".................................................", "informations": ["UniProtKB/Swiss-Prot:P07327", "UniProtKB/Swiss-Prot:P28469", "UniProtKB/Swiss-Prot:Q5RBP7", "PFAM:PF00002", "UniProtKB/Swiss-Prot:P25405", "UniProtKB/Swiss-Prot:P00325"], "score": "2"}, "running_statistics": "Blablablablabla", "tool": {"name": "boomstick", "parameters": [ "input", "42", "output/location"], "version": "2.4.1"}}]}
{
"queryId": "000001",
"computations": [
{
"results": {
"alignment": ".................................................",
"informations": [
"UniProtKB/Swiss-Prot:P07327",
"EC:1.1.1.1",
"GO:0006915",
"GO:0042981",
"UniProtKB/Swiss-Prot:P28469",
"UniProtKB/Swiss-Prot:Q5RBP7",
"PFAM:PF00002",
"SO:0000715",
"SO:0000022",
"SO:0000380",
"UniProtKB/Swiss-Prot:P25405",
"GO:0043065",
"UniProtKB/Swiss-Prot:P00325"
],
"score": "2"
},
"running_statistics": "Blablablablabla",
"tool": {
"name": "boomstick",
"parameters": [
"input",
"42",
"output/location"
],
"version": "2.4.1"
}
}
]
}
{
"queryId": "000001",
"computations": [
{
"results": {
"alignment": ".................................................",
"informations": [
"UniProtKB/Swiss-Prot:P07327",
"UniProtKB/Swiss-Prot:P28469",
"UniProtKB/Swiss-Prot:Q5RBP7",
"PFAM:PF00002",
"UniProtKB/Swiss-Prot:P25405",
"UniProtKB/Swiss-Prot:P00325"
],
"score": "2"
},
"running_statistics": "Blablablablabla",
"tool": {
"name": "boomstick",
"parameters": [
"input",
"42",
"output/location"
],
"version": "2.4.1"
}
}
]
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment