Skip to content
Snippets Groups Projects
Commit 2f4bd01f authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Add pepstats tool by fabian schnecko

parent 8e077df6
No related branches found
No related tags found
No related merge requests found
# Module manifest for the Pepstats analysis
# The name of the module. Is needed for the list-analyses option, for custom
# configurations and custom profiles.
name: 'pepstats'
# Short description of the analysis.
info: 'Calculates statistics of protein properties'
# The configuration of the script for the analysis step.
analysis:
# script must take a --fasta parameter
script: 'run_pepstats.py'
# specify additional default configuration here
parameters:
# run script in a container
container:
docker: 'biocontainers/emboss:v6.6.0dfsg-7b1-deb_cv1'
singularity: 'biocontainers/emboss:v6.6.0dfsg-7b1-deb_cv1'
# The configuration of the script for the json conversion step.
converter:
# script must take a --result parameter, which is the result from the analysis step
script: 'convert_pepstats.py'
# specify additional default configuration here
parameters:
......@@ -8,3 +8,4 @@ modules:
evalue: 1e-5
hmmer_ecfgroups:
hmmer_ecfsubgroups:
pepstats:
......@@ -7,3 +7,4 @@ modules:
tmhmm:
ghostx_swissprot:
hmmer_pfam_a:
pepstats:
......@@ -6,3 +6,4 @@ modules:
blastp_swissprot:
hmmer_pfam_a:
tmhmm:
pepstats:
......@@ -8,3 +8,4 @@ modules:
targetp:
organism_group: 'non-plant'
tmhmm:
pepstats:
......@@ -9,3 +9,4 @@ modules:
organism_group: plant
ghostx_swissprot:
hmmer_pfam_a:
pepstats:
......@@ -9,3 +9,4 @@ modules:
organism_group: non-plant
ghostx_swissprot:
hmmer_pfam_a:
pepstats:
......@@ -6,3 +6,4 @@ modules:
signalp:
organism: 'euk'
tmhmm:
pepstats:
#!/usr/bin/env python3
import sys
import json
import argparse
import glob
parser = argparse.ArgumentParser(description='Convert pepstats results to json documents')
parser.add_argument('--result', '-r', required=True, help='The pepstats result file')
parser.add_argument('--output', '-o', required=True, help='The converted results json file')
args = parser.parse_args()
filename = args.result
documents={}
residue=False
propertyv=False
#id
with open(filename) as r:
#Über Ergebnisse iterieren
for line in r:
if line.startswith('PEPSTATS'):
document={}
line=line.strip().split()
seq_id=line[2]
if not seq_id in documents:
documents[seq_id] = {
"id": seq_id,
"computations": [
]
}
computation = {
'tool':{'name':'Pepstats','version':'EMBOSS:6.6.0.0'},
'results' : []
}
result = {}
#Ergebnisse zur gabzen Sequenz abspeichern
elif line.startswith('Molecular'):
line=line.strip().split()
molecularw=line[3]
residues=line[6]
result['Molecular weight']=float(molecularw)
result['Residues']=float(residues)
elif line.startswith('Average'):
line=line.strip().split()
averagerw=line[4]
charge=line[7]
result['Average Residue Weight']= float(averagerw)
result['Charge']= float(charge)
elif line.startswith('Isoelectric'):
line=line.strip().split()
iso=line[3]
result['Isolectric point']=float(iso)
elif line.startswith('Improbability'):
line=line.strip().split()
probabilityib=1-float(line[7])
result['Probability of expression in inclusion bodies']=probabilityib
#Ergebnisse zu einzelnen Aminosäuren abspeichern
elif residue == False and line.startswith('Residue'):
residue=True
elif residue==True:
line=line.strip().split()
#print(result)
if line==[]:
residue = False
else:
residueS=line[2]
number=line[3]
mole=line[4]
dayhoff=line[5]
if 'Amino acids' not in result:
result['Amino acids']=[]
amino={}
amino['Residue']=residueS
amino['Number']=int(number)
amino['Mole%']=float(mole)
amino['DayhoffStat']=float(dayhoff)
result['Amino acids'].append(amino)
amino={}
#Ergebnisse zu Aminosäure-Gruppen abspeichern
elif propertyv == False and line.startswith('Property'):
propertyv=True
elif propertyv==True:
line=line.strip().split()
print(line)
if line ==[]:
propertyv=False
computation['results'].append(result)
documents[seq_id]['computations'].append(computation)
else:
propertyd=line[0]
residuesd=line[1]
numberd=line[2]
moled=line[3]
if 'Physico-chemical class' not in result:
result['Physico-chemical class']=[]
pcc={}
pcc['Property']=propertyd
pcc['Residues']=residuesd
pcc['Number']=int(numberd)
pcc['Mole%']=float(moled)
result['Physico-chemical class'].append(pcc)
pcc={}
with open(args.output, 'w') as o:
json.dump(documents,o)
\ No newline at end of file
#!/usr/bin/env python3
import argparse
from os import system,environ
pepstats_tool='pepstats'
parser =argparse.ArgumentParser(description='Calculates statistics of protein properties')
parser.add_argument('--fasta','-f', required=True, help='A fasta file')
parser.add_argument('--output','-o',required=True, help='A output file')
args=parser.parse_args()
print(pepstats_tool , args.fasta , args.output)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment