Commit 2f4bd01f authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Add pepstats tool by fabian schnecko

parent 8e077df6
# Module manifest for the Pepstats analysis
# The name of the module. Is needed for the list-analyses option, for custom
# configurations and custom profiles.
name: 'pepstats'
# Short description of the analysis.
info: 'Calculates statistics of protein properties'
# The configuration of the script for the analysis step.
analysis:
# script must take a --fasta parameter
script: 'run_pepstats.py'
# specify additional default configuration here
parameters:
# run script in a container
container:
docker: 'biocontainers/emboss:v6.6.0dfsg-7b1-deb_cv1'
singularity: 'biocontainers/emboss:v6.6.0dfsg-7b1-deb_cv1'
# The configuration of the script for the json conversion step.
converter:
# script must take a --result parameter, which is the result from the analysis step
script: 'convert_pepstats.py'
# specify additional default configuration here
parameters:
......@@ -8,3 +8,4 @@ modules:
evalue: 1e-5
hmmer_ecfgroups:
hmmer_ecfsubgroups:
pepstats:
......@@ -7,3 +7,4 @@ modules:
tmhmm:
ghostx_swissprot:
hmmer_pfam_a:
pepstats:
......@@ -6,3 +6,4 @@ modules:
blastp_swissprot:
hmmer_pfam_a:
tmhmm:
pepstats:
......@@ -8,3 +8,4 @@ modules:
targetp:
organism_group: 'non-plant'
tmhmm:
pepstats:
......@@ -9,3 +9,4 @@ modules:
organism_group: plant
ghostx_swissprot:
hmmer_pfam_a:
pepstats:
......@@ -9,3 +9,4 @@ modules:
organism_group: non-plant
ghostx_swissprot:
hmmer_pfam_a:
pepstats:
......@@ -6,3 +6,4 @@ modules:
signalp:
organism: 'euk'
tmhmm:
pepstats:
#!/usr/bin/env python3
import sys
import json
import argparse
import glob
parser = argparse.ArgumentParser(description='Convert pepstats results to json documents')
parser.add_argument('--result', '-r', required=True, help='The pepstats result file')
parser.add_argument('--output', '-o', required=True, help='The converted results json file')
args = parser.parse_args()
filename = args.result
documents={}
residue=False
propertyv=False
#id
with open(filename) as r:
#Über Ergebnisse iterieren
for line in r:
if line.startswith('PEPSTATS'):
document={}
line=line.strip().split()
seq_id=line[2]
if not seq_id in documents:
documents[seq_id] = {
"id": seq_id,
"computations": [
]
}
computation = {
'tool':{'name':'Pepstats','version':'EMBOSS:6.6.0.0'},
'results' : []
}
result = {}
#Ergebnisse zur gabzen Sequenz abspeichern
elif line.startswith('Molecular'):
line=line.strip().split()
molecularw=line[3]
residues=line[6]
result['Molecular weight']=float(molecularw)
result['Residues']=float(residues)
elif line.startswith('Average'):
line=line.strip().split()
averagerw=line[4]
charge=line[7]
result['Average Residue Weight']= float(averagerw)
result['Charge']= float(charge)
elif line.startswith('Isoelectric'):
line=line.strip().split()
iso=line[3]
result['Isolectric point']=float(iso)
elif line.startswith('Improbability'):
line=line.strip().split()
probabilityib=1-float(line[7])
result['Probability of expression in inclusion bodies']=probabilityib
#Ergebnisse zu einzelnen Aminosäuren abspeichern
elif residue == False and line.startswith('Residue'):
residue=True
elif residue==True:
line=line.strip().split()
#print(result)
if line==[]:
residue = False
else:
residueS=line[2]
number=line[3]
mole=line[4]
dayhoff=line[5]
if 'Amino acids' not in result:
result['Amino acids']=[]
amino={}
amino['Residue']=residueS
amino['Number']=int(number)
amino['Mole%']=float(mole)
amino['DayhoffStat']=float(dayhoff)
result['Amino acids'].append(amino)
amino={}
#Ergebnisse zu Aminosäure-Gruppen abspeichern
elif propertyv == False and line.startswith('Property'):
propertyv=True
elif propertyv==True:
line=line.strip().split()
print(line)
if line ==[]:
propertyv=False
computation['results'].append(result)
documents[seq_id]['computations'].append(computation)
else:
propertyd=line[0]
residuesd=line[1]
numberd=line[2]
moled=line[3]
if 'Physico-chemical class' not in result:
result['Physico-chemical class']=[]
pcc={}
pcc['Property']=propertyd
pcc['Residues']=residuesd
pcc['Number']=int(numberd)
pcc['Mole%']=float(moled)
result['Physico-chemical class'].append(pcc)
pcc={}
with open(args.output, 'w') as o:
json.dump(documents,o)
\ No newline at end of file
#!/usr/bin/env python3
import argparse
from os import system,environ
pepstats_tool='pepstats'
parser =argparse.ArgumentParser(description='Calculates statistics of protein properties')
parser.add_argument('--fasta','-f', required=True, help='A fasta file')
parser.add_argument('--output','-o',required=True, help='A output file')
args=parser.parse_args()
print(pepstats_tool , args.fasta , args.output)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment