Commit 4dda3c47 authored by hmueller's avatar hmueller
Browse files

Include mature sequence in json output if available.

parent f72da30b
......@@ -3,13 +3,26 @@
import sys
import json
import argparse
from os import path
parser = argparse.ArgumentParser(description='Convert signalp results to json document')
parser.add_argument('--result', '-r', required=True, help='The signalp result directory')
parser.add_argument('--output', '-o', required=True, help='The converted results json file')
args = parser.parse_args()
# Check wether args.result + '/mature.fas' exists and if so read into id:seq dict
mature_file = args.result + '/mature.fas'
include_mature = path.isfile(mature_file)
mature = {}
if include_mature:
with open(mature_file) as m:
id = None
for line in m:
if line.startswith(">"):
id = line.split()[0][1:]
mature[id] = ''
elif id is not None:
mature[id] += line.strip()
filename = args.result + '/results.txt'
documents = {}
......@@ -21,15 +34,23 @@ with open(filename) as f:
split = line.split()
if not split[0] in documents:
documents[split[0]] = {"id": split[0], "computations": [{'tool': tool, 'results':[]}]}
results = documents[split[0]]['computations'][0]["results"]
results = documents[split[0]]['computations'][0]["results"]
if split[9] == "Y":
'signalpeptide': True,
'score': float(split[8]),
'start': 1,
'end': int(split[2])-1
if include_mature:
mature_seq = ''
mature_seq = mature[split[0]]
except ValueError:
print('SignalP Converter: No mature sequence received for protein: {}, although signal peptide present!'.format(split[0]))
results[-1]['mature sequence'] = mature_seq
results.append({'signalpeptide': False})
