Commit 3b7d874f authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Add script to resolve dbxrefs for psot results

parent 19c938e1
def dbxrefs_from(document):
'''finds all dbxrefs in the document and returns a list of ids'''
refs = []
for c in document['computations']:
for r in c['results']:
if 'target'in r and 'dbxref' in r['target']:
refs.append(r['target']['dbxref'])
return set(refs)
def combine(document, informations):
'''adds an information tag for each result that references a dbxref'''
for c in document['computations']:
for r in c['results']:
if 'target' in r and 'dbxref' in r['target']:
r['informations'] = next(entry for entry in informations if entry['id'] == r['target']['dbxref'])
......@@ -3,35 +3,24 @@
import json
import argparse
import sys
from dbxref.retriever import retrieve
from dbxref.resolver import convert_to_dbxrefs
from psot import dbxref
parser = argparse.ArgumentParser(description='Resolve dbxrefs from json file')
parser.add_argument('--input', '-i', required=True, help='The json input document')
parser.add_argument('--retriever', '-s', required=True, help='Location of retriever')
parser.add_argument('--output', '-o', required=True, help='The json output document')
args = parser.parse_args()
basename = args.retriever.replace('scripts/', '')
sys.path.append(basename)
sys.path.append(basename + 'dbxref')
from retriever import retrieve
json_file = open(args.input).read()
j = json.loads(json_file)
refs = []
for c in j['computations']:
for r in c['results']:
if 'target'in r and 'dbxref' in r['target']:
refs.append(r['target']['dbxref'])
entries = retrieve(sorted(refs), basename)
for c in j['computations']:
for r in c['results']:
if 'target'in r and 'dbxref' in r['target']:
r['target']['dbxref'] = {'dbxref': r['target']['dbxref'], 'informations': next(entry for entry in entries if entry['id'] == r['target']['dbxref'])}
#with open(args.input, 'w') as o:
with open(args.output, 'w') as o:
json.dump(j, o)
with open(args.input, 'r') as json_file:
j = json.load(json_file)
# extract all dbxrefs
dbxrefs = dbxref.dbxrefs_from(j)
# retrieve information for dbxrefs
entries = retrieve(convert_to_dbxrefs(sorted(dbxrefs)))
# add information field to results
dbxref.combine(j, entries)
# write updated json
with open(args.output, 'w') as o:
json.dump(j, o)
This diff is collapsed.
import unittest
import json
import pkg_resources
from psot import config, dbxref
class TestDbXrefs(unittest.TestCase):
def test_combine(self):
test_data = {'id': 'bla',
'computations': [
{
'results':[
{
'target' : {
'dbxref' : 'test:123'
}
}
]
}
]
}
test_information = [
{
'id': 'test:123',
'text' : 'some text'
}
]
dbxref.combine(test_data, test_information)
self.assertTrue('text' in test_data['computations'][0]['results'][0]['informations'])
self.assertEqual(test_data['computations'][0]['results'][0]['informations']['text'], 'some text')
def test_find_all_dbxrefs(self):
# read test file
test_file = pkg_resources.resource_filename('tests', 'data/dbxref_example.json')
with open(test_file, 'r') as f:
data = json.load(f)
# extract dbxrefs
dbxrefs = dbxref.dbxrefs_from(data)
# test if all expected dbxrefs were found
self.assertEqual(dbxrefs, set([
"PFAM:PF01048.19",
"PFAM:PF10423.8",
"UniProtKB/Swiss-Prot:A0AIU3",
"UniProtKB/Swiss-Prot:A0KIZ1",
"UniProtKB/Swiss-Prot:A0PUL8",
"UniProtKB/Swiss-Prot:A1A7K5",
"UniProtKB/Swiss-Prot:A1IGA8",
"UniProtKB/Swiss-Prot:A1RMF2",
"UniProtKB/Swiss-Prot:A3D1T1",
"UniProtKB/Swiss-Prot:A3QBQ0",
"UniProtKB/Swiss-Prot:A4IN93",
"UniProtKB/Swiss-Prot:A4IR66",
"UniProtKB/Swiss-Prot:A4SP53",
"UniProtKB/Swiss-Prot:A4W6Q6",
"UniProtKB/Swiss-Prot:A4Y4H9",
"UniProtKB/Swiss-Prot:A5F5R2",
"UniProtKB/Swiss-Prot:A5ITC6",
"UniProtKB/Swiss-Prot:A5UCP4",
"UniProtKB/Swiss-Prot:A5UIX8",
"UniProtKB/Swiss-Prot:A6QHE1",
"UniProtKB/Swiss-Prot:A6T4W3",
"UniProtKB/Swiss-Prot:A6U271",
"UniProtKB/Swiss-Prot:A6VPH1",
"UniProtKB/Swiss-Prot:A6WKN2",
"UniProtKB/Swiss-Prot:A7MGS5",
"UniProtKB/Swiss-Prot:A7X306",
"UniProtKB/Swiss-Prot:A7ZHQ1",
"UniProtKB/Swiss-Prot:A7ZWA7",
"UniProtKB/Swiss-Prot:A8ALC9",
"UniProtKB/Swiss-Prot:A8Z4D8",
"UniProtKB/Swiss-Prot:A9L5L1",
"UniProtKB/Swiss-Prot:A9MPK2",
"UniProtKB/Swiss-Prot:A9N0Q5",
"UniProtKB/Swiss-Prot:B0K6Y4",
"UniProtKB/Swiss-Prot:B0K889",
"UniProtKB/Swiss-Prot:B0TIS5",
"UniProtKB/Swiss-Prot:B0URX4",
"UniProtKB/Swiss-Prot:B1IQI1",
"UniProtKB/Swiss-Prot:B1LGW2",
"UniProtKB/Swiss-Prot:B1XD29",
"UniProtKB/Swiss-Prot:B1YJD6",
"UniProtKB/Swiss-Prot:B2U303",
"UniProtKB/Swiss-Prot:B4SUY9",
"UniProtKB/Swiss-Prot:B4TK35",
"UniProtKB/Swiss-Prot:B4TXR1",
"UniProtKB/Swiss-Prot:B5BL87",
"UniProtKB/Swiss-Prot:B5F8S1",
"UniProtKB/Swiss-Prot:B5FAL1",
"UniProtKB/Swiss-Prot:B5FJ06",
"UniProtKB/Swiss-Prot:B5R3H1",
"UniProtKB/Swiss-Prot:B5RHE5",
"UniProtKB/Swiss-Prot:B5Y1L0",
"UniProtKB/Swiss-Prot:B5Z0D9",
"UniProtKB/Swiss-Prot:B6EKZ7",
"UniProtKB/Swiss-Prot:B6HZD5",
"UniProtKB/Swiss-Prot:B7GIU7",
"UniProtKB/Swiss-Prot:B7GKU4",
"UniProtKB/Swiss-Prot:B7LGM1",
"UniProtKB/Swiss-Prot:B7LWC0",
"UniProtKB/Swiss-Prot:B7M1A0",
"UniProtKB/Swiss-Prot:B7MBE2",
"UniProtKB/Swiss-Prot:B7MP21",
"UniProtKB/Swiss-Prot:B7N828",
"UniProtKB/Swiss-Prot:B7NIC2",
"UniProtKB/Swiss-Prot:B7UIK6",
"UniProtKB/Swiss-Prot:B7VJ21",
"UniProtKB/Swiss-Prot:B8D7B4",
"UniProtKB/Swiss-Prot:B8D909",
"UniProtKB/Swiss-Prot:B8DE17",
"UniProtKB/Swiss-Prot:B8EBS7",
"UniProtKB/Swiss-Prot:B9DNJ2",
"UniProtKB/Swiss-Prot:C0Q5S0",
"UniProtKB/Swiss-Prot:C1KVE1",
"UniProtKB/Swiss-Prot:C3LQF1",
"UniProtKB/Swiss-Prot:C4L559",
"UniProtKB/Swiss-Prot:C4ZRQ2",
"UniProtKB/Swiss-Prot:C5D2F9",
"UniProtKB/Swiss-Prot:C6BU87",
"UniProtKB/Swiss-Prot:O08444",
"UniProtKB/Swiss-Prot:O22216",
"UniProtKB/Swiss-Prot:O51931",
"UniProtKB/Swiss-Prot:P0A1F6",
"UniProtKB/Swiss-Prot:P0A1F7",
"UniProtKB/Swiss-Prot:P0AE12",
"UniProtKB/Swiss-Prot:P0AE13",
"UniProtKB/Swiss-Prot:P0AE13",
"UniProtKB/Swiss-Prot:P0AF12",
"UniProtKB/Swiss-Prot:P0AF13",
"UniProtKB/Swiss-Prot:P0AF14",
"UniProtKB/Swiss-Prot:P0AF15",
"UniProtKB/Swiss-Prot:P12758",
"UniProtKB/Swiss-Prot:P43770",
"UniProtKB/Swiss-Prot:P45113",
"UniProtKB/Swiss-Prot:P50389",
"UniProtKB/Swiss-Prot:P52711",
"UniProtKB/Swiss-Prot:P53595",
"UniProtKB/Swiss-Prot:P57306",
"UniProtKB/Swiss-Prot:P59406",
"UniProtKB/Swiss-Prot:P60216",
"UniProtKB/Swiss-Prot:P60217",
"UniProtKB/Swiss-Prot:P65500",
"UniProtKB/Swiss-Prot:P75712",
"UniProtKB/Swiss-Prot:P77835",
"UniProtKB/Swiss-Prot:P9WJJ8",
"UniProtKB/Swiss-Prot:P9WJJ9",
"UniProtKB/Swiss-Prot:Q0I5K4",
"UniProtKB/Swiss-Prot:Q0T847",
"UniProtKB/Swiss-Prot:Q0TLH2",
"UniProtKB/Swiss-Prot:Q12KE6",
"UniProtKB/Swiss-Prot:Q1RG29",
"UniProtKB/Swiss-Prot:Q2FGC5",
"UniProtKB/Swiss-Prot:Q2FXX8",
"UniProtKB/Swiss-Prot:Q2NVP7",
"UniProtKB/Swiss-Prot:Q2YT29",
"UniProtKB/Swiss-Prot:Q31DQ5",
"UniProtKB/Swiss-Prot:Q325Y0",
"UniProtKB/Swiss-Prot:Q32JU9",
"UniProtKB/Swiss-Prot:Q3Z5J8",
"UniProtKB/Swiss-Prot:Q47UY5",
"UniProtKB/Swiss-Prot:Q4L6V0",
"UniProtKB/Swiss-Prot:Q4QL83",
"UniProtKB/Swiss-Prot:Q57T48",
"UniProtKB/Swiss-Prot:Q5E2X3",
"UniProtKB/Swiss-Prot:Q5HFG2",
"UniProtKB/Swiss-Prot:Q5HNU8",
"UniProtKB/Swiss-Prot:Q5KWV9",
"UniProtKB/Swiss-Prot:Q5KZM1",
"UniProtKB/Swiss-Prot:Q5PD46",
"UniProtKB/Swiss-Prot:Q5WHL7",
"UniProtKB/Swiss-Prot:Q65IW0",
"UniProtKB/Swiss-Prot:Q65X70",
"UniProtKB/Swiss-Prot:Q6AQW7",
"UniProtKB/Swiss-Prot:Q6G8W9",
"UniProtKB/Swiss-Prot:Q6GGA2",
"UniProtKB/Swiss-Prot:Q6LUR4",
"UniProtKB/Swiss-Prot:Q71ZH6",
"UniProtKB/Swiss-Prot:Q7A0R5",
"UniProtKB/Swiss-Prot:Q7A5B0",
"UniProtKB/Swiss-Prot:Q7MNT0",
"UniProtKB/Swiss-Prot:Q7VKK0",
"UniProtKB/Swiss-Prot:Q87SE5",
"UniProtKB/Swiss-Prot:Q89AQ7",
"UniProtKB/Swiss-Prot:Q8CP08",
"UniProtKB/Swiss-Prot:Q8DEM9",
"UniProtKB/Swiss-Prot:Q8EHA7",
"UniProtKB/Swiss-Prot:Q8ENY0",
"UniProtKB/Swiss-Prot:Q8EPT8",
"UniProtKB/Swiss-Prot:Q8S0G4",
"UniProtKB/Swiss-Prot:Q8S2G5",
"UniProtKB/Swiss-Prot:Q8Y729",
"UniProtKB/Swiss-Prot:Q92BL9",
"UniProtKB/Swiss-Prot:Q99TQ0",
"UniProtKB/Swiss-Prot:Q9CP62",
"UniProtKB/Swiss-Prot:Q9KDD4",
"UniProtKB/Swiss-Prot:Q9KPI8",
"UniProtKB/Swiss-Prot:Q9S785",
"UniProtKB/Swiss-Prot:Q9UZ37",
]))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment