Skip to content
Snippets Groups Projects
convert_pepstats.py 3.70 KiB
#!/usr/bin/env python3

import sys
import json
import argparse
import glob

parser = argparse.ArgumentParser(description='Convert pepstats results to json documents')
parser.add_argument('--result', '-r', required=True, help='The pepstats result file')
parser.add_argument('--output', '-o', required=True, help='The converted results json file')
args = parser.parse_args()




filename = args.result 
documents={}
residue=False
propertyv=False
#id
with open(filename) as r:
    #Über Ergebnisse iterieren
    for line in r:
        if line.startswith('PEPSTATS'):
            document={}
            line=line.strip().split()
            seq_id=line[2]
            if not seq_id in documents:
                documents[seq_id] = {
                    "id": seq_id, 
                    "computations": [
                    ]
                }
                computation = {
                     'tool':{'name':'Pepstats','version':'EMBOSS:6.6.0.0'}, 
                     'results' : []
                }
                result = {}     
#Ergebnisse zur gabzen Sequenz abspeichern
        elif line.startswith('Molecular'):
            line=line.strip().split()
            molecularw=line[3]
            residues=line[6]
            result['Molecular weight']=float(molecularw)
            result['Residues']=float(residues)
        elif line.startswith('Average'):
            line=line.strip().split()
            averagerw=line[4]
            charge=line[7]
            result['Average Residue Weight']= float(averagerw)
            result['Charge']= float(charge)
        elif line.startswith('Isoelectric'):
            line=line.strip().split()
            iso=line[3]  
            result['Isolectric point']=float(iso)
        elif line.startswith('Improbability'):
            line=line.strip().split()
            probabilityib=1-float(line[7])  
            result['Probability of expression in inclusion bodies']=probabilityib
#Ergebnisse zu einzelnen Aminosäuren abspeichern            
        elif residue == False and line.startswith('Residue'):
            residue=True
        elif residue==True:  
            line=line.strip().split()
            #print(result)   
            if line==[]:
                residue = False
            else:
                residueS=line[2]
                number=line[3]
                mole=line[4]
                dayhoff=line[5]
                if 'Amino acids' not in result:
                    result['Amino acids']=[]
                    amino={}
                amino['Residue']=residueS
                amino['Number']=int(number)
                amino['Mole%']=float(mole)
                amino['DayhoffStat']=float(dayhoff)
                result['Amino acids'].append(amino)
                amino={}
#Ergebnisse zu Aminosäure-Gruppen abspeichern
        elif propertyv == False and line.startswith('Property'):
            propertyv=True
        elif propertyv==True:
            line=line.strip().split()
            print(line)
            if line ==[]:
                propertyv=False
                computation['results'].append(result) 
                documents[seq_id]['computations'].append(computation)
            else:
                propertyd=line[0]
                residuesd=line[1]
                numberd=line[2]
                moled=line[3]
                if 'Physico-chemical class' not in result:
                    result['Physico-chemical class']=[]
                    pcc={}
                pcc['Property']=propertyd
                pcc['Residues']=residuesd
                pcc['Number']=int(numberd)
                pcc['Mole%']=float(moled)
                result['Physico-chemical class'].append(pcc)   
                pcc={}




with open(args.output, 'w') as o:
   json.dump(documents,o)