-
Lukas Jelonek authoredLukas Jelonek authored
convert_pepstats.py 3.70 KiB
#!/usr/bin/env python3
import sys
import json
import argparse
import glob
parser = argparse.ArgumentParser(description='Convert pepstats results to json documents')
parser.add_argument('--result', '-r', required=True, help='The pepstats result file')
parser.add_argument('--output', '-o', required=True, help='The converted results json file')
args = parser.parse_args()
filename = args.result
documents={}
residue=False
propertyv=False
#id
with open(filename) as r:
#Über Ergebnisse iterieren
for line in r:
if line.startswith('PEPSTATS'):
document={}
line=line.strip().split()
seq_id=line[2]
if not seq_id in documents:
documents[seq_id] = {
"id": seq_id,
"computations": [
]
}
computation = {
'tool':{'name':'Pepstats','version':'EMBOSS:6.6.0.0'},
'results' : []
}
result = {}
#Ergebnisse zur gabzen Sequenz abspeichern
elif line.startswith('Molecular'):
line=line.strip().split()
molecularw=line[3]
residues=line[6]
result['Molecular weight']=float(molecularw)
result['Residues']=float(residues)
elif line.startswith('Average'):
line=line.strip().split()
averagerw=line[4]
charge=line[7]
result['Average Residue Weight']= float(averagerw)
result['Charge']= float(charge)
elif line.startswith('Isoelectric'):
line=line.strip().split()
iso=line[3]
result['Isolectric point']=float(iso)
elif line.startswith('Improbability'):
line=line.strip().split()
probabilityib=1-float(line[7])
result['Probability of expression in inclusion bodies']=probabilityib
#Ergebnisse zu einzelnen Aminosäuren abspeichern
elif residue == False and line.startswith('Residue'):
residue=True
elif residue==True:
line=line.strip().split()
#print(result)
if line==[]:
residue = False
else:
residueS=line[2]
number=line[3]
mole=line[4]
dayhoff=line[5]
if 'Amino acids' not in result:
result['Amino acids']=[]
amino={}
amino['Residue']=residueS
amino['Number']=int(number)
amino['Mole%']=float(mole)
amino['DayhoffStat']=float(dayhoff)
result['Amino acids'].append(amino)
amino={}
#Ergebnisse zu Aminosäure-Gruppen abspeichern
elif propertyv == False and line.startswith('Property'):
propertyv=True
elif propertyv==True:
line=line.strip().split()
print(line)
if line ==[]:
propertyv=False
computation['results'].append(result)
documents[seq_id]['computations'].append(computation)
else:
propertyd=line[0]
residuesd=line[1]
numberd=line[2]
moled=line[3]
if 'Physico-chemical class' not in result:
result['Physico-chemical class']=[]
pcc={}
pcc['Property']=propertyd
pcc['Residues']=residuesd
pcc['Number']=int(numberd)
pcc['Mole%']=float(moled)
result['Physico-chemical class'].append(pcc)
pcc={}
with open(args.output, 'w') as o:
json.dump(documents,o)