#!/usr/bin/env python3 import sys import json import argparse import glob parser = argparse.ArgumentParser(description='Convert pepstats results to json documents') parser.add_argument('--result', '-r', required=True, help='The pepstats result file') parser.add_argument('--output', '-o', required=True, help='The converted results json file') args = parser.parse_args() filename = args.result documents={} residue=False propertyv=False #id with open(filename) as r: #Über Ergebnisse iterieren for line in r: if line.startswith('PEPSTATS'): document={} line=line.strip().split() seq_id=line[2] if not seq_id in documents: documents[seq_id] = { "id": seq_id, "computations": [ ] } computation = { 'tool':{'name':'Pepstats','version':'EMBOSS:6.6.0.0'}, 'results' : [] } result = {} #Ergebnisse zur gabzen Sequenz abspeichern elif line.startswith('Molecular'): line=line.strip().split() molecularw=line[3] residues=line[6] result['Molecular weight']=float(molecularw) result['Residues']=float(residues) elif line.startswith('Average'): line=line.strip().split() averagerw=line[4] charge=line[7] result['Average Residue Weight']= float(averagerw) result['Charge']= float(charge) elif line.startswith('Isoelectric'): line=line.strip().split() iso=line[3] result['Isolectric point']=float(iso) elif line.startswith('Improbability'): line=line.strip().split() probabilityib=1-float(line[7]) result['Probability of expression in inclusion bodies']=probabilityib #Ergebnisse zu einzelnen Aminosäuren abspeichern elif residue == False and line.startswith('Residue'): residue=True elif residue==True: line=line.strip().split() #print(result) if line==[]: residue = False else: residueS=line[2] number=line[3] mole=line[4] dayhoff=line[5] if 'Amino acids' not in result: result['Amino acids']=[] amino={} amino['Residue']=residueS amino['Number']=int(number) amino['Mole%']=float(mole) amino['DayhoffStat']=float(dayhoff) result['Amino acids'].append(amino) amino={} #Ergebnisse zu Aminosäure-Gruppen abspeichern elif propertyv == False and line.startswith('Property'): propertyv=True elif propertyv==True: line=line.strip().split() print(line) if line ==[]: propertyv=False computation['results'].append(result) documents[seq_id]['computations'].append(computation) else: propertyd=line[0] residuesd=line[1] numberd=line[2] moled=line[3] if 'Physico-chemical class' not in result: result['Physico-chemical class']=[] pcc={} pcc['Property']=propertyd pcc['Residues']=residuesd pcc['Number']=int(numberd) pcc['Mole%']=float(moled) result['Physico-chemical class'].append(pcc) pcc={} with open(args.output, 'w') as o: json.dump(documents,o)