Add pepstats tool by fabian schnecko

2f4bd01f · Lukas Jelonek · 8e077df6 · 2f4bd01f · 2f4bd01f · 2f4bd01f
Commit 2f4bd01f authored 5 years ago by Lukas Jelonek
--- a/modules/pepstats.yaml
+++ b/modules/pepstats.yaml
+# Module manifest for the Pepstats analysis
+
+# The name of the module. Is needed for the list-analyses option, for custom
+# configurations and custom profiles.
+name: 'pepstats'
+
+# Short description of the analysis.
+info: 'Calculates statistics of protein properties'
+
+# The configuration of the script for the analysis step.
+analysis:
+    # script must take a --fasta parameter
+    script: 'run_pepstats.py'
+    # specify additional default configuration here
+    parameters:
+    # run script in a container
+    container:
+      docker: 'biocontainers/emboss:v6.6.0dfsg-7b1-deb_cv1'
+      singularity: 'biocontainers/emboss:v6.6.0dfsg-7b1-deb_cv1'
+
+# The configuration of the script for the json conversion step.
+converter:
+    # script must take a --result parameter, which is the result from the analysis step
+    script: 'convert_pepstats.py'
+    # specify additional default configuration here
+    parameters:
--- a/profiles/bacteria-ecf.yaml
+++ b/profiles/bacteria-ecf.yaml
@@ -8,3 +8,4 @@ modules:
          evalue: 1e-5
  hmmer_ecfgroups:
  hmmer_ecfsubgroups:
+  pepstats:
--- a/profiles/bacteria-gram-.yaml
+++ b/profiles/bacteria-gram-.yaml
@@ -7,3 +7,4 @@ modules:
  tmhmm:
  ghostx_swissprot:
  hmmer_pfam_a:
+  pepstats:
--- a/profiles/common.yaml
+++ b/profiles/common.yaml
@@ -6,3 +6,4 @@ modules:
  blastp_swissprot:
  hmmer_pfam_a:
  tmhmm:
+  pepstats:
--- a/profiles/complete.yaml
+++ b/profiles/complete.yaml
@@ -8,3 +8,4 @@ modules:
  targetp:
    organism_group: 'non-plant'
  tmhmm:
+  pepstats:
--- a/profiles/eukaryote-plant.yaml
+++ b/profiles/eukaryote-plant.yaml
@@ -9,3 +9,4 @@ modules:
    organism_group: plant
  ghostx_swissprot:
  hmmer_pfam_a:
+  pepstats:
--- a/profiles/eukaryote.yaml
+++ b/profiles/eukaryote.yaml
@@ -9,3 +9,4 @@ modules:
    organism_group: non-plant
  ghostx_swissprot:
  hmmer_pfam_a:
+  pepstats:
--- a/profiles/fast.yaml
+++ b/profiles/fast.yaml
@@ -6,3 +6,4 @@ modules:
  signalp:
      organism: 'euk'
  tmhmm:
+  pepstats:
--- a/scripts/convert_pepstats.py
+++ b/scripts/convert_pepstats.py
+#!/usr/bin/env python3
+
+import sys
+import json
+import argparse
+import glob
+
+parser = argparse.ArgumentParser(description='Convert pepstats results to json documents')
+parser.add_argument('--result', '-r', required=True, help='The pepstats result file')
+parser.add_argument('--output', '-o', required=True, help='The converted results json file')
+args = parser.parse_args()
+
+
+
+
+filename = args.result 
+documents={}
+residue=False
+propertyv=False
+#id
+with open(filename) as r:
+    #Über Ergebnisse iterieren
+    for line in r:
+        if line.startswith('PEPSTATS'):
+            document={}
+            line=line.strip().split()
+            seq_id=line[2]
+            if not seq_id in documents:
+                documents[seq_id] = {
+                    "id": seq_id, 
+                    "computations": [
+                    ]
+                }
+                computation = {
+                     'tool':{'name':'Pepstats','version':'EMBOSS:6.6.0.0'}, 
+                     'results' : []
+                }
+                result = {}     
+#Ergebnisse zur gabzen Sequenz abspeichern
+        elif line.startswith('Molecular'):
+            line=line.strip().split()
+            molecularw=line[3]
+            residues=line[6]
+            result['Molecular weight']=float(molecularw)
+            result['Residues']=float(residues)
+        elif line.startswith('Average'):
+            line=line.strip().split()
+            averagerw=line[4]
+            charge=line[7]
+            result['Average Residue Weight']= float(averagerw)
+            result['Charge']= float(charge)
+        elif line.startswith('Isoelectric'):
+            line=line.strip().split()
+            iso=line[3]  
+            result['Isolectric point']=float(iso)
+        elif line.startswith('Improbability'):
+            line=line.strip().split()
+            probabilityib=1-float(line[7])  
+            result['Probability of expression in inclusion bodies']=probabilityib
+#Ergebnisse zu einzelnen Aminosäuren abspeichern            
+        elif residue == False and line.startswith('Residue'):
+            residue=True
+        elif residue==True:  
+            line=line.strip().split()
+            #print(result)   
+            if line==[]:
+                residue = False
+            else:
+                residueS=line[2]
+                number=line[3]
+                mole=line[4]
+                dayhoff=line[5]
+                if 'Amino acids' not in result:
+                    result['Amino acids']=[]
+                    amino={}
+                amino['Residue']=residueS
+                amino['Number']=int(number)
+                amino['Mole%']=float(mole)
+                amino['DayhoffStat']=float(dayhoff)
+                result['Amino acids'].append(amino)
+                amino={}
+#Ergebnisse zu Aminosäure-Gruppen abspeichern
+        elif propertyv == False and line.startswith('Property'):
+            propertyv=True
+        elif propertyv==True:
+            line=line.strip().split()
+            print(line)
+            if line ==[]:
+                propertyv=False
+                computation['results'].append(result) 
+                documents[seq_id]['computations'].append(computation)
+            else:
+                propertyd=line[0]
+                residuesd=line[1]
+                numberd=line[2]
+                moled=line[3]
+                if 'Physico-chemical class' not in result:
+                    result['Physico-chemical class']=[]
+                    pcc={}
+                pcc['Property']=propertyd
+                pcc['Residues']=residuesd
+                pcc['Number']=int(numberd)
+                pcc['Mole%']=float(moled)
+                result['Physico-chemical class'].append(pcc)   
+                pcc={}
+
+
+
+
+with open(args.output, 'w') as o:
+   json.dump(documents,o)
\ No newline at end of file
--- a/scripts/run_pepstats.py
+++ b/scripts/run_pepstats.py
+#!/usr/bin/env python3
+import argparse
+from os import system,environ
+
+pepstats_tool='pepstats'
+
+parser =argparse.ArgumentParser(description='Calculates statistics of protein properties')
+parser.add_argument('--fasta','-f', required=True, help='A fasta file')
+parser.add_argument('--output','-o',required=True, help='A output file')
+args=parser.parse_args()
+
+
+print(pepstats_tool , args.fasta , args.output)