Commit 95fa5968 authored by Marc's avatar Marc
Browse files

run scripts now print cmdline they used to run and are executed in nextflow.py...

run scripts now print cmdline they used to run and are executed in nextflow.py as subprocesses; analysis tools are now executed in the nextflow processes directly
parent 7ea6c9a6
......@@ -11,7 +11,7 @@ info: 'blastp analysis against swissprot'
analysis:
script: 'run_blastp.py'
parameters:
database: '/home/ubuntu/db/uniprot_sprot.fasta'
database: '/home/ubuntu/db/blastp/uniprot_sprot.fasta'
evalue: 1e-10
use_accession:
container:
......
......@@ -11,7 +11,7 @@ info: 'ghostx analysis against swissprot'
analysis:
script: 'run_ghostx.py'
parameters:
database: '/home/ubuntu/db/uniprot_sprot.fasta'
database: '/home/ubuntu/db/ghostx/uniprot_sprot'
container:
# The name of the result to json converter script. Must take one parameter, the
......
......@@ -11,7 +11,7 @@ info: 'hmmscan analysis against PFAM-A'
analysis:
script: 'run_hmmer.py'
parameters:
database: '/home/ubuntu/db/Pfam-A.hmm'
database: '/home/ubuntu/db/hmmer/Pfam-A.hmm'
evalue: 1e-10
execution:
cluster:
......
......@@ -4,4 +4,6 @@ modules:
signalp:
organism: 'euk'
targetp:
organism_group: 'non-plant'
\ No newline at end of file
organism_group: 'non-plant'
ghostx_swissprot:
hmmer_pfam_a:
\ No newline at end of file
......@@ -2,6 +2,7 @@
import sys
import json
import argparse
import glob
parser = argparse.ArgumentParser(description='Convert hmmscan results to json documents')
parser.add_argument('--result', '-r', required=True, help='The run_hmmer result directory')
......@@ -9,10 +10,14 @@ parser.add_argument('--output', '-o', required=True, help='The converted results
parser.add_argument('--dbxref', '-d', required=True, help='The dbxref prefix that will be prepended to the accession or id')
args = parser.parse_args()
query_file = args.result + "/queries.json"
# Provide a list of all query sequence names for conversion process
queries = []
with open(query_file) as f:
queries = json.load(f)
path = "*_enum_headers.tsv"
for filename in glob.glob(path):
with open(filename) as f:
for line in f:
if line.startswith('>'):
queries.append(line.split()[0].strip().lstrip('>'))
filename = args.result + "/domtblout.tsv"
documents = {}
......
#!/usr/bin/env python3
import argparse
from os import system,environ
def find_tool_path():
from psot import config
return config.load_config()['tools'].get('blastp', 'blastp')
blastp_tool = ''
if 'INSIDE_CONTAINER' in environ:
blastp_tool = 'blastp'
else:
blastp_tool = find_tool_path()
blastp_tool = 'blastp'
parser = argparse.ArgumentParser(description='Identify homologues in the swissprot database')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
......@@ -32,4 +24,4 @@ format += ' pident qstart qend sstart send slen evalue bitscore'
if args.alignment:
format += ' btop'
system(blastp_tool + " -db " + args.database + " -outfmt '" + format + "' -out " + args.output + " -query " + args.fasta + " -evalue " + args.evalue)
print(blastp_tool + " -db " + args.database + " -outfmt '" + format + "' -out " + args.output + " -query " + args.fasta + " -evalue " + args.evalue)
#!/usr/bin/env python3
import argparse
import re
from os import system,makedirs,path,environ
import subprocess
import json
def find_tool_path():
from psot import config
cfg = config.load_config()
return cfg['tools'].get('ghostx', 'ghostx'), cfg['helpers_path'] + '/reduce_fasta_headers_to_enumeration.py'
ghostx_tool = ''
reduce_headers_tool =''
if 'INSIDE_CONTAINER' in environ:
ghostx_tool = 'ghostx'
reduce_headers_tool = 'reduce_fasta_headers_to_enumeration.py'
else:
ghostx_tool, reduce_headers_tool = find_tool_path()
ghostx_tool = 'ghostx'
parser = argparse.ArgumentParser(description='Identify homologues in the swissprot database')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
......@@ -26,26 +11,15 @@ parser.add_argument('--database', '-d', required=True, help='Database to search
parser.add_argument('--output', '-o', required=True, help='The result directory. Will contain info.json, results.tsv and enum_headers.tsv.')
args = parser.parse_args()
makedirs(args.output, exist_ok=True)
# Swap fasta headers for unique numbers to save ghostx from dealing with complex headers
subprocess.run([reduce_headers_tool, "-f", args.fasta, "-e", args.output + '/enum_headers.tsv'])
print('mkdir -p ' + args.output)
# Aproach:
# directory for output
# info.json -> Tool info
# result.json -> Tool result
toolconfig = {
'name': 'ghostx',
'database': args.database
}
# find version
output = subprocess.run([ghostx_tool], stdout=subprocess.PIPE)
text = output.stdout.decode('ascii')
result = re.search('version (.*)', text)
toolconfig['version'] = result.group(1)
with open(args.output + '/info.json', 'w') as f:
json.dump(toolconfig, f)
system(ghostx_tool + " aln -d " + args.database + " -o " + args.output + "/results.tsv -i " + args.fasta)
print(" VERSION=\$(ghostx | grep -Eo '[[:digit:]]\\\\.[[:digit:]]\\\\.[[:digit:]]')")
toolconfig = '{\\\\"name\\\\": \\\\"ghostx\\\\", \\\\"database\\\\":' \
+ '\\\\"' + args.database + '\\\\"' + ', \\\\"version\\\\":\\\\"\$VERSION\\\\" }'
print(' echo ' + toolconfig + ' > ' + args.output + '/info.json')
print(' ' + ghostx_tool + " aln -d " + args.database + " -o " + args.output + "/results.tsv -i " + args.fasta)
......@@ -4,16 +4,7 @@ import argparse
import json
from os import system,makedirs,environ
def find_tool_path():
from psot import config
return config.load_config()['tools'].get('hmmscan', 'hmmscan')
hmmscan_tool = ''
if 'INSIDE_CONTAINER' in environ:
hmmscan_tool = 'hmmscan'
else:
hmmscan_tool = find_tool_path()
hmmscan_tool = 'hmmscan'
parser = argparse.ArgumentParser(description='Search sequences against a profile database')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
......@@ -22,9 +13,9 @@ parser.add_argument('--output', '-o', required=True, help='The result directory'
parser.add_argument('--evalue', '-e', default='0.0001', help='Evalue cutoff')
args = parser.parse_args()
makedirs(args.output, exist_ok=True)
print('mkdir -p ' + args.output)
system(hmmscan_tool +
print(hmmscan_tool +
" -E " + args.evalue +
" -o " + args.output + "/hmmscan.out " +
" --tblout " + args.output + "/tblout.tsv " +
......@@ -32,14 +23,3 @@ system(hmmscan_tool +
" --pfamtblout " + args.output + "/pfamtblout.tsv " +
args.database + " " + args.fasta)
# Provide a list of all query sequence names for conversion process
queries = []
with open(args.fasta) as f:
for line in f:
if line.startswith('>'):
queries.append(line.split()[0].strip().lstrip('>'))
query_file = args.output + '/queries.json'
with open(query_file, 'w') as o:
json.dump(queries, o)
#!/usr/bin/env python3
import argparse
from os import system
import subprocess
parser = argparse.ArgumentParser(description='Script that returns the fasta as it is inserted. Can be used for modules that have no actual analysis.')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
parser.add_argument('--output', '-o', required=True, help='The result directory. Will contain info.json and results.tsv.')
args = parser.parse_args()
system('cp ' + args.fasta + " " + args.output)
print('cp ' + args.fasta + " " + args.output)
......@@ -3,16 +3,7 @@
import argparse
from os import system,makedirs,environ
def find_tool_path():
from psot import config
return config.load_config()['tools'].get('signalp', 'signalp')
signalp_tool = ''
if 'INSIDE_CONTAINER' in environ:
signalp_tool = 'signalp'
else:
signalp_tool = find_tool_path()
signalp_tool = 'signalp'
parser = argparse.ArgumentParser(description='Find signal peptides in amino acid sequences')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
......@@ -21,8 +12,8 @@ parser.add_argument('--include_mature', '-m', action='store_true', help='Include
parser.add_argument('--output', required=True, help='The result directory. Will contain results.txt and optionally mature.fas.')
args = parser.parse_args()
makedirs(args.output, exist_ok=True)
print('mkdir -p ' + args.output)
mature_param = " -m {}/mature.fas ".format(args.output) if args.include_mature else " "
results_file = args.output + '/results.txt'
system(signalp_tool + " -t " + args.organism + mature_param + args.fasta + " > " + results_file)
print(' ' + signalp_tool + " -t " + args.organism + mature_param + args.fasta + " > " + results_file)
......@@ -2,10 +2,6 @@
import argparse
from os import system,makedirs,path,environ
import subprocess
targetp_tool = ''
reduce_headers_tool =''
targetp_tool = 'targetp'
......@@ -17,7 +13,7 @@ parser.add_argument('--organism_group', choices=org_flags.keys(), required=True,
parser.add_argument('--output', required=True, help='The result directory. Will contain results.txt and enum_headers.tsv.')
args = parser.parse_args()
makedirs(args.output, exist_ok=True)
print('mkdir -p ' + args.output)
results_file = args.output + '/results.txt'
system(targetp_tool + " " + org_flags[args.organism_group] + " " + args.fasta + " > " + results_file)
print(' ' + targetp_tool + " " + org_flags[args.organism_group] + " " + args.fasta + " > " + results_file)
......@@ -3,20 +3,11 @@
import argparse
from os import system,environ
def find_tool_path():
from psot import config
return config.load_config()['tools'].get('tmhmm', 'tmhmm')
tmhmm_tool = ''
if 'INSIDE_CONTAINER' in environ:
tmhmm_tool = 'tmhmm'
else:
tmhmm_tool = find_tool_path()
tmhmm_tool = 'tmhmm'
parser = argparse.ArgumentParser(description='Find transmembrane helices in amino acid sequences')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
parser.add_argument('--output', required=True, help='The output file')
args = parser.parse_args()
system("cat " + args.fasta + " | " + tmhmm_tool + " -short > " + args.output) #
print("cat " + args.fasta + " | " + tmhmm_tool + " -short > " + args.output) #
from string import Template
import os.path
import os
import subprocess
from copy import deepcopy
import collections
......@@ -42,7 +43,7 @@ process ${id} {
script:
"""
${analysis_script} --fasta $$fasta --output $${fasta}.${id}.results ${analysis_params}
${cmdline}
"""
}
''')
......@@ -292,6 +293,10 @@ def generate_nextflow_script(execution):
config = flatten(m)
config['output'] = execution['output']
config['helpers_path'] = execution['helpers_path']
command = Template("""${analysis_script} --fasta '$$fasta' --output '$${fasta}.${id}.results' ${analysis_params}""").substitute(config)
cmdline = subprocess.run(command, shell=True, stdout=subprocess.PIPE)
config['cmdline'] = cmdline.stdout.decode('utf-8')
fragments.append(analysis_template.substitute(config))
if execution['mode'] == 'live' and not execution['fetch_informations']:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment