Commit 63463904 authored by Raphael Müller's avatar Raphael Müller
Browse files

integrated nanopore

parent b69d95d0
......@@ -9,29 +9,70 @@ DATA = config["datadir"]
ASSEMBLY = config["assembly"]
WD = config["workdir"]+"/"
include: "snakemake_workflows/single_only.smk"
include: "snakemake_workflows/single_multi.smk"
include: "snakemake_workflows/paired_only.smk"
include: "snakemake_workflows/nanopore.smk"
include: "snakemake_workflows/nanopore_multi.smk"
include: "snakemake_workflows/pacbio_multi.smk"
include: "snakemake_workflows/pacbio_nanopore.smk"
include: "snakemake_workflows/pacbio_only.smk"
include: "snakemake_workflows/paired_multi.smk"
include: "snakemake_workflows/single_paired.smk"
include: "snakemake_workflows/paired_nanopore.smk"
include: "snakemake_workflows/paired_only.smk"
include: "snakemake_workflows/paired_pacbio.smk"
include: "snakemake_workflows/paired_pacbio_nanopore.smk"
include: "snakemake_workflows/single_multi.smk"
include: "snakemake_workflows/single_nanopore.smk"
include: "snakemake_workflows/single_only.smk"
include: "snakemake_workflows/single_pacbio.smk"
include: "snakemake_workflows/single_pacbio_nanopore.smk"
include: "snakemake_workflows/single_paired.smk"
include: "snakemake_workflows/single_paired_nanopore.smk"
include: "snakemake_workflows/single_paired_pacbio.smk"
include: "snakemake_workflows/single_paired_pacbio_nanopore.smk"
rule all:
input:
rules.assembly_nanopore_multi_perl.output,
rules.assembly_nanopore_multi_nextflow.output,
rules.assembly_nanopore_multi_nextflow_params.output,
rules.assembly_nanopore_perl.output,
rules.assembly_nanopore_nextflow.output,
rules.assembly_pacbio_multi_perl.output,
rules.assembly_pacbio_multi_nextflow.output,
rules.assembly_pacbio_multi_nextflow_params.output,
rules.assembly_pacbio_nanopore_perl.output,
rules.assembly_pacbio_nanopore_nextflow.output,
rules.assembly_pacbio_perl.output,
rules.assembly_pacbio_nextflow.output,
rules.assembly_paired_multi_perl.output,
rules.assembly_paired_multi_nextflow.output,
rules.assembly_paired_multi_nextflow_params.output,
rules.assembly_paired_pacbio_nanopore_perl.output,
rules.assembly_paired_pacbio_nanopore_nextflow.output,
rules.assembly_paired_perl.output,
rules.assembly_paired_nextflow.output,
rules.assembly_single_paired_pacbio_nanopore_perl.output,
rules.assembly_single_paired_pacbio_nanopore_nextflow.output,
rules.assembly_paired_pacbio_perl.output,
rules.assembly_paired_pacbio_nextflow.output,
rules.assembly_single_multi_perl.output,
rules.assembly_single_multi_nextflow.output,
rules.assembly_single_multi_nextflow_params.output,
rules.assembly_single_nanopore_perl.output,
rules.assembly_single_nanopore_nextflow.output,
rules.assembly_single_perl.output,
rules.assembly_single_nextflow.output,
rules.assembly_single_paired_perl.output,
rules.assembly_single_paired_nextflow.output,
rules.assembly_single_pacbio_nanopore_perl.output,
rules.assembly_single_pacbio_nanopore_nextflow.output,
rules.assembly_single_pacbio_perl.output,
rules.assembly_single_pacbio_nextflow.output,
rules.assembly_paired_multi_perl.output,
rules.assembly_paired_multi_nextflow.output,
rules.assembly_paired_perl.output,
rules.assembly_paired_nextflow.output,
rules.assembly_single_paired_nanopore_perl.output,
rules.assembly_single_paired_nanopore_nextflow.output,
rules.assembly_single_paired_pacbio_nanopore_perl.output,
rules.assembly_single_paired_pacbio_nanopore_nextflow.output,
rules.assembly_single_paired_pacbio_perl.output,
rules.assembly_single_paired_pacbio_nextflow.output,
rules.assembly_single_paired_perl.output,
rules.assembly_single_paired_nextflow.output,
#test_cases = [os.path.join(config["workdir"],key,method,value["output_file"]+value["extension"]) for key,value in config["cases"].items() for method in ["original", "nextflow"]]
......
---
benchmark_repeats: 1
workdir: "test"
datadir: "dataset1/"
original_perl_script: "perl original_wf/backmap/backmap.pl"
......
name: backmap
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- snakemake
- mamba
- nextflow
- perl
- perl-app-cpanminus
- samtools
- bedtools
- bwa
- minimap2
- qualimap
- multiqc
- r-base
- r-ggplot2
- r-dplyr
- r-extrafont
This diff is collapsed.
executor {
$local {
cpus = 12
cpus = 16
}
}
process {
executor = 'local'
cpus = 1
cpus = 4
}
......@@ -615,7 +615,7 @@ if($assembly_path ne ""){
my $single_bam = join(" ",@paired_bam,@unpaired_bam);
$single_bam =~ s/^\s+//;
$single_bam =~ s/\s+$//;
$cmd = "ln -s $single_bam $out_dir/$prefix.bam";
$cmd = "ln -fs $single_bam $out_dir/$prefix.bam";
exe_cmd($cmd,$verbose,$dry);
push(@merged_bam_file, "$out_dir/$prefix.bam");
}
......@@ -629,7 +629,7 @@ if($assembly_path ne ""){
if(scalar(@pb_bam) > 0){
if(scalar(@pb_bam) == 1){
my $single_bam = $pb_bam[0];
$cmd = "ln -s $single_bam $out_dir/$prefix.pb.bam";
$cmd = "ln -fs $single_bam $out_dir/$prefix.pb.bam";
exe_cmd($cmd,$verbose,$dry);
push(@merged_bam_file, "$out_dir/$prefix.pb.bam");
}
......@@ -643,7 +643,7 @@ if($assembly_path ne ""){
if(scalar(@ont_bam) > 0){
if(scalar(@ont_bam) == 1){
my $single_bam = $ont_bam[0];
$cmd = "ln -s $single_bam $out_dir/$prefix.ont.bam";
$cmd = "ln -fs $single_bam $out_dir/$prefix.ont.bam";
exe_cmd($cmd,$verbose,$dry);
push(@merged_bam_file, "$out_dir/$prefix.ont.bam");
}
......@@ -784,9 +784,11 @@ if($create_histo_switch == 1){
open(RALL,'>',"$rscript") or die "ERROR\tCould not open file $rscript\n";
print RALL "xmax <- 0\n";
for(my $i = 0; $i < scalar(@techs); $i++){
if(exists($cov_files{$techs[$i]})){
print RALL "$techs[$i]=read.table(\"$cov_files{$techs[$i]}\")\n";
print RALL "xmax <- max(xmax, $techs[$i]\[,1])\n";
}
}
my $pdf = $rscript;
......@@ -795,12 +797,13 @@ if($create_histo_switch == 1){
my @legend = ();
my @lty = ();
my @col = ();
print RALL "plot(NULL,log=\"x\",type=\"l\",xlab=\"Coverage\",ylab=\"Count\",main=\"$assembly\",ylim=c(0,$global_ymax[0]), xlim=c(1,xmax))\n";
for(my $i = 0; $i < scalar(@techs); $i++){
if(exists $cov_files{$techs[$i]}){
push(@legend,"\"$techs[$i] N(0)=$n0_all{$techs[$i]}\"");
push(@lty,"1");
if($i == 0 and exists($cov_files{$techs[$i]})){
print RALL "plot($techs[$i]\[,1],$techs[$i]\[,2],log=\"x\",type=\"l\",xlab=\"Coverage\",ylab=\"Count\",main=\"$assembly\",ylim=c(0,$global_ymax[0]))\n";
print RALL "lines($techs[$i]\[,1],$techs[$i]\[,2],type=\"l\",col=\"black\")\n";
push(@col,"\"black\"");
}
if($i == 1 and exists($cov_files{$techs[$i]})){
......
nanopore:
- data_prep/nanopore.sim.100.fastq
- data_prep/nanopore.sim.101.fastq
- data_prep/nanopore.sim.102.fastq
pacbio:
- data_prep/pacbio.10000.1.fastq
- data_prep/pacbio.10000.5.fastq
- data_prep/pacbio.10000.9.fastq
paired-end:
- data_prep/paired_1.1000.1.fastq,data_prep/paired_2.1000.1.fastq
- data_prep/paired_1.1000.5.fastq,data_prep/paired_2.1000.5.fastq
- data_prep/paired_1.1000.9.fastq,data_prep/paired_2.1000.9.fastq
single-end:
- data_prep/unpaired.100.1.fastq
- data_prep/unpaired.100.5.fastq
- data_prep/unpaired.100.9.fastq
......@@ -4,3 +4,6 @@ conda-prefix: "no_backup/conda/"
conda-frontend: "mamba"
keep-going: True
cores: 1
restart-times: 0
max-jobs-per-second: 1
shadow-prefix: "/tmp/rmueller/"
OF = WD+"assembly_nanopore/perl/"
perl2nf = lambda s: s.replace(r"/perl/",r"/nextflow/")
NAME = "na"
dir_outputs = [*expand(OF+NAME+"{t}.sort_stats",t=[".ont"])]
rule assembly_nanopore_perl:
input:
assembly = ASSEMBLY,
nanopore = "data_prep/nanopore.sim.100.fastq",
output:
dirs = [directory(x) for x in dir_outputs],
nanopore = multiext(OF + NAME + ".ont","1.bam",".bam", *multiext(".sort.bam","",*multiext(".cov-hist","",".pdf"), ".stats", ".stats.err"), *multiext(".sort_stats_bamqc.","err","log")) + [OF+NAME+"_minimap_ont1.err"],
params:
prefix=NAME,
outdir=OF,
prgm=config["original_perl_script"]
threads: 16
benchmark: repeat("benchmarks/perl/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = "{folder}{name}.stdout.log".format(folder=OF, name=NAME),
stderr = "{folder}{name}.stderr.log".format(folder=OF, name=NAME)
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} -kt -v -a {input.assembly} -o {params.outdir} -pre {params.prefix} -t {threads} -ont {input.nanopore} 1> {log.stdout} 2> {log.stderr}
"""
rule assembly_nanopore_nextflow:
input:
assembly = rules.assembly_nanopore_perl.input.assembly,
nanopore = rules.assembly_nanopore_perl.input.nanopore,
output:
[perl2nf(x) for x in rules.assembly_nanopore_perl.output if x not in dir_outputs],
[directory(perl2nf(x)) for x in dir_outputs],
params:
prefix=rules.assembly_nanopore_perl.params.prefix,
outdir=perl2nf(rules.assembly_nanopore_perl.params.outdir),
prgm=config["nextflow_script"]
threads: 16
benchmark: repeat("benchmarks/nextflow/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = perl2nf(rules.assembly_nanopore_perl.log.stdout),
stderr = perl2nf(rules.assembly_nanopore_perl.log.stderr),
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} --keep-temporary --assembly {input.assembly} --output {params.outdir} --prefix {params.prefix} --nanopore {input.nanopore} 1> {log.stdout} 2> {log.stderr}
"""
OF = WD+"assembly_nanopore_multi/perl/"
perl2nf = lambda s: s.replace(r"/perl/",r"/nextflow/")
perl2nf_params = lambda s: s.replace(r"/perl/",r"/nextflow_params/")
NAME = "nanana"
dir_outputs = [*expand(OF+NAME+"{t}.sort_stats",t=[".ont"])]
rule assembly_nanopore_multi_perl:
input:
assembly = ASSEMBLY,
nanopore = [
"data_prep/nanopore.sim.100.fastq",
"data_prep/nanopore.sim.101.fastq",
"data_prep/nanopore.sim.102.fastq",
]
output:
dirs = [directory(x) for x in dir_outputs],
nanopore = multiext(OF + NAME + ".ont","1.bam","2.bam","3.bam",".bam", *multiext(".sort.bam","",*multiext(".cov-hist","",".pdf"), ".stats", ".stats.err"), *multiext(".sort_stats_bamqc.","err","log")) + multiext(OF+NAME+"_minimap_ont","1.err","2.err","3.err"),
params:
prefix=NAME,
outdir=OF,
prgm=config["original_perl_script"],
nanopore = lambda wildcards, input: ["-ont "+ont for ont in input.nanopore]
threads: 16
benchmark: repeat("benchmarks/perl/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = "{folder}{name}.stdout.log".format(folder=OF, name=NAME),
stderr = "{folder}{name}.stderr.log".format(folder=OF, name=NAME)
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} -kt -v -a {input.assembly} -o {params.outdir} -pre {params.prefix} -t {threads} {params.nanopore} 1> {log.stdout} 2> {log.stderr}
"""
rule assembly_nanopore_multi_nextflow:
input:
assembly = rules.assembly_nanopore_multi_perl.input.assembly,
nanopore = rules.assembly_nanopore_multi_perl.input.nanopore,
output:
[perl2nf(x) for x in rules.assembly_nanopore_multi_perl.output if x not in dir_outputs],
[directory(perl2nf(x)) for x in dir_outputs],
params:
prefix=rules.assembly_nanopore_multi_perl.params.prefix,
outdir=perl2nf(rules.assembly_nanopore_multi_perl.params.outdir),
nanopore=lambda wildcards, input: ",".join(input.nanopore),
prgm=config["nextflow_script"]
threads: 16
benchmark: repeat("benchmarks/nextflow/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = perl2nf(rules.assembly_nanopore_multi_perl.log.stdout),
stderr = perl2nf(rules.assembly_nanopore_multi_perl.log.stderr),
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} --keep-temporary --assembly {input.assembly} --output {params.outdir} --prefix {params.prefix} --nanopore {params.nanopore} 1> {log.stdout} 2> {log.stderr}
"""
rule assembly_nanopore_multi_nextflow_params:
input:
assembly = rules.assembly_nanopore_multi_perl.input.assembly,
nanopore = rules.assembly_nanopore_multi_perl.input.nanopore,
output:
all = [perl2nf_params(x) for x in rules.assembly_nanopore_multi_perl.output if x not in dir_outputs],
dirs = [directory(perl2nf_params(x)) for x in dir_outputs],
params_file = "params_files/nanopore_multi.yaml"
params:
prefix=rules.assembly_nanopore_multi_perl.params.prefix,
outdir=perl2nf_params(rules.assembly_nanopore_multi_perl.params.outdir),
nanopore=lambda wildcards, input: "\\n".join([ " - " + x for x in input.nanopore]),
prgm=config["nextflow_script"]
threads: 16
benchmark: repeat("benchmarks/nextflow_params/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = perl2nf_params(rules.assembly_nanopore_multi_perl.log.stdout),
stderr = perl2nf_params(rules.assembly_nanopore_multi_perl.log.stderr),
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
echo "nanopore:" > {output.params_file}
echo -e "{params.nanopore}" >> {output.params_file}
{params.prgm} --keep-temporary --assembly {input.assembly} --output {params.outdir} --prefix {params.prefix} -params-file {output.params_file} 1> {log.stdout} 2> {log.stderr}
"""
OF = WD+"assembly_single_only/perl/"
OF = WD+"assembly_pacbio_multi/perl/"
perl2nf = lambda s: s.replace(r"/perl/",r"/nextflow/")
NAME = "se"
perl2nf_params = lambda s: s.replace(r"/perl/",r"/nextflow_params/")
NAME = "papapa"
rule assembly_single_perl:
rule assembly_pacbio_multi_perl:
input:
assembly = ASSEMBLY,
unpaired = "data_prep/unpaired.100.1.fastq"
pacbio = [
"data_prep/pacbio.10000.1.fastq",
"data_prep/pacbio.10000.5.fastq",
"data_prep/pacbio.10000.9.fastq"
],
output:
multiext(OF,
NAME+".bam",
*multiext(NAME+".sort.bam","",*multiext(".cov-hist", "",".err", ".log", ".pdf", ".plot.r")),
*multiext(NAME+".sort_stats_bamqc.","err","log"),
NAME+".unpaired1.bam",
*multiext(NAME+"_bwa_index.","err","log"),
NAME+"_bwa_mem_unpaired1.err",
) + multiext(OF+NAME+".sort_stats/", "genome_results.txt","qualimapReport.html")
sort_stats_pb = multiext(OF+NAME+".pb.sort_stats/", "genome_results.txt", "qualimapReport.html"),
pacbio = multiext(OF+NAME+".pb","1.bam","2.bam","3.bam",".bam",*multiext(".sort.bam","",*multiext(".cov-hist","",".pdf"), ".stats",".stats.err"),*multiext(".sort_stats_bamqc.","err","log")) + multiext(OF+NAME+"_minimap_pb","1.err","2.err","3.err"),
params:
prefix=NAME,
outdir=OF,
pacbio = lambda wildcards, input: ["-pb "+pb for pb in input.pacbio],
prgm=config["original_perl_script"]
threads: 1
threads: 16
benchmark: repeat("benchmarks/nextflow/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = "{folder}/{name}.stdout.log".format(folder=OF, name=NAME),
stderr = "{folder}/{name}.stderr.log".format(folder=OF, name=NAME)
stdout = "{folder}{name}.stdout.log".format(folder=OF, name=NAME),
stderr = "{folder}{name}.stderr.log".format(folder=OF, name=NAME)
shell:
"""
{params.prgm} -kt -v -a {input.assembly} -o {params.outdir} -pre {params.prefix} -t {threads} -u {input.unpaired} 1> {log.stdout} 2> {log.stderr}
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} -kt -v -a {input.assembly} -o {params.outdir} -pre {params.prefix} -t {threads} {params.pacbio} 1> {log.stdout} 2> {log.stderr}
"""
rule assembly_single_nextflow:
rule assembly_pacbio_multi_nextflow:
input:
assembly = rules.assembly_single_perl.input.assembly,
unpaired = rules.assembly_single_perl.input.unpaired,
assembly = rules.assembly_pacbio_multi_perl.input.assembly,
pacbio = rules.assembly_pacbio_multi_perl.input.pacbio,
output:
[perl2nf(x) for x in rules.assembly_single_perl.output]
[perl2nf(x) for x in rules.assembly_pacbio_multi_perl.output]
params:
prefix=rules.assembly_single_perl.params.prefix,
outdir=perl2nf(rules.assembly_single_perl.params.outdir),
prefix=rules.assembly_pacbio_multi_perl.params.prefix,
outdir=perl2nf(rules.assembly_pacbio_multi_perl.params.outdir),
pacbio=lambda wildcards, input: ",".join(input.pacbio),
prgm=config["nextflow_script"]
threads: 1
threads: 16
benchmark: repeat("benchmarks/nextflow/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = perl2nf(rules.assembly_single_perl.log.stdout),
stderr = perl2nf(rules.assembly_single_perl.log.stderr),
stdout = perl2nf(rules.assembly_pacbio_multi_perl.log.stdout),
stderr = perl2nf(rules.assembly_pacbio_multi_perl.log.stderr),
shell:
"""
{params.prgm} --kt --a {input.assembly} --o {params.outdir} --pre {params.prefix} --u {input.unpaired} 1> {log.stdout} 2> {log.stderr}
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} --keep-temporary --assembly {input.assembly} --output {params.outdir} --prefix {params.prefix} --pacbio {params.pacbio} 1> {log.stdout} 2> {log.stderr}
"""
rule assembly_pacbio_multi_nextflow_params:
input:
assembly = rules.assembly_pacbio_multi_perl.input.assembly,
pacbio = rules.assembly_pacbio_multi_perl.input.pacbio,
output:
all = [perl2nf_params(x) for x in rules.assembly_pacbio_multi_perl.output],
params_file = "params_files/pacbio_multi.yaml"
params:
prefix=rules.assembly_pacbio_multi_perl.params.prefix,
outdir=perl2nf_params(rules.assembly_pacbio_multi_perl.params.outdir),
pacbio=lambda wildcards, input: "\\n".join([ " - " + x for x in input.pacbio]),
prgm=config["nextflow_script"]
threads: 16
benchmark: repeat("benchmarks/nextflow_params/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = perl2nf_params(rules.assembly_pacbio_multi_perl.log.stdout),
stderr = perl2nf_params(rules.assembly_pacbio_multi_perl.log.stderr),
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
echo "pacbio:" > {output.params_file}
echo -e "{params.pacbio}" >> {output.params_file}
{params.prgm} --keep-temporary --assembly {input.assembly} --output {params.outdir} --prefix {params.prefix} -params-file {output.params_file} 1> {log.stdout} 2> {log.stderr}
"""
OF = WD+"assembly_pacbio_nanopore/perl/"
perl2nf = lambda s: s.replace(r"/perl/",r"/nextflow/")
NAME = "pana"
dir_outputs = [OF+"multiqc_data",*expand(OF+NAME+"{t}.sort_stats",t=[".pb",".ont"])]
rule assembly_pacbio_nanopore_perl:
input:
assembly = ASSEMBLY,
pacbio = "data_prep/pacbio.10000.1.fastq",
nanopore = "data_prep/nanopore.sim.100.fastq",
output:
dirs = [directory(x) for x in dir_outputs],
multiqc = multiext(OF + "multiqc",".err",".log","_report.html"),
plot = multiext(OF + NAME + ".plot.all.","pdf"),
pacbio = multiext(OF+NAME+".pb","1.bam",".bam",*multiext(".sort.bam","",*multiext(".cov-hist","",".pdf"), ".stats",".stats.err"),*multiext(".sort_stats_bamqc.","err","log")) + [OF+NAME+"_minimap_pb1.err"],
nanopore = multiext(OF + NAME + ".ont","1.bam",".bam", *multiext(".sort.bam","",*multiext(".cov-hist","",".pdf"), ".stats", ".stats.err"), *multiext(".sort_stats_bamqc.","err","log")) + [OF+NAME+"_minimap_ont1.err"],
params:
prefix=NAME,
outdir=OF,
prgm=config["original_perl_script"]
threads: 16
benchmark: repeat("benchmarks/perl/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = "{folder}{name}.stdout.log".format(folder=OF, name=NAME),
stderr = "{folder}{name}.stderr.log".format(folder=OF, name=NAME)
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} -kt -v -a {input.assembly} -o {params.outdir} -pre {params.prefix} -t {threads} -pb {input.pacbio} -ont {input.nanopore} 1> {log.stdout} 2> {log.stderr}
"""
rule assembly_pacbio_nanopore_nextflow:
input:
assembly = rules.assembly_pacbio_nanopore_perl.input.assembly,
nanopore = rules.assembly_pacbio_nanopore_perl.input.nanopore,
pacbio = rules.assembly_pacbio_nanopore_perl.input.pacbio,
output:
[perl2nf(x) for x in rules.assembly_pacbio_nanopore_perl.output if x not in dir_outputs],
[directory(perl2nf(x)) for x in dir_outputs],
params:
prefix=rules.assembly_pacbio_nanopore_perl.params.prefix,
outdir=perl2nf(rules.assembly_pacbio_nanopore_perl.params.outdir),
prgm=config["nextflow_script"]
threads: 16
benchmark: repeat("benchmarks/nextflow/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = perl2nf(rules.assembly_pacbio_nanopore_perl.log.stdout),
stderr = perl2nf(rules.assembly_pacbio_nanopore_perl.log.stderr),
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} --keep-temporary --assembly {input.assembly} --output {params.outdir} --prefix {params.prefix} --pacbio {input.pacbio} --nanopore {input.nanopore} 1> {log.stdout} 2> {log.stderr}
"""
OF = WD+"assembly_single_only/perl/"
OF = WD+"assembly_pacbio_only/perl/"
perl2nf = lambda s: s.replace(r"/perl/",r"/nextflow/")
NAME = "se"
NAME = "pa"
rule assembly_single_perl:
rule assembly_pacbio_perl:
input:
assembly = ASSEMBLY,
unpaired = "data_prep/unpaired.100.1.fastq"
pacbio = "data_prep/pacbio.10000.1.fastq",
output:
multiext(OF,
NAME+".bam",
*multiext(NAME+".sort.bam","",*multiext(".cov-hist", "",".err", ".log", ".pdf", ".plot.r")),
*multiext(NAME+".sort_stats_bamqc.","err","log"),
NAME+".unpaired1.bam",
*multiext(NAME+"_bwa_index.","err","log"),
NAME+"_bwa_mem_unpaired1.err",
) + multiext(OF+NAME+".sort_stats/", "genome_results.txt","qualimapReport.html")
sort_stats_pb = multiext(OF+NAME+".pb.sort_stats/", "genome_results.txt", "qualimapReport.html"),
pacbio = multiext(OF+NAME+".pb","1.bam",".bam",*multiext(".sort.bam","",*multiext(".cov-hist","",".pdf"), ".stats",".stats.err"),*multiext(".sort_stats_bamqc.","err","log")) + [OF+NAME+"_minimap_pb1.err"],
params:
prefix=NAME,
outdir=OF,
prgm=config["original_perl_script"]
threads: 1
threads: 16
benchmark: repeat("benchmarks/nextflow/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = "{folder}/{name}.stdout.log".format(folder=OF, name=NAME),
stderr = "{folder}/{name}.stderr.log".format(folder=OF, name=NAME)
stdout = "{folder}{name}.stdout.log".format(folder=OF, name=NAME),
stderr = "{folder}{name}.stderr.log".format(folder=OF, name=NAME)
shell:
"""
{params.prgm} -kt -v -a {input.assembly} -o {params.outdir} -pre {params.prefix} -t {threads} -u {input.unpaired} 1> {log.stdout} 2> {log.stderr}
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} -kt -v -a {input.assembly} -o {params.outdir} -pre {params.prefix} -t {threads} -pb {input.pacbio} 1> {log.stdout} 2> {log.stderr}
"""
rule assembly_single_nextflow:
rule assembly_pacbio_nextflow:
input:
assembly = rules.assembly_single_perl.input.assembly,
unpaired = rules.assembly_single_perl.input.unpaired,
assembly = rules.assembly_pacbio_perl.input.assembly,
pacbio= rules.assembly_pacbio_perl.input.pacbio,
output:
[perl2nf(x) for x in rules.assembly_single_perl.output]
[perl2nf(x) for x in rules.assembly_pacbio_perl.output]
params:
prefix=rules.assembly_single_perl.params.prefix,
outdir=perl2nf(rules.assembly_single_perl.params.outdir),
prefix=rules.assembly_pacbio_perl.params.prefix,
outdir=perl2nf(rules.assembly_pacbio_perl.params.outdir),
prgm=config["nextflow_script"]
threads: 1
threads: 16
benchmark: repeat("benchmarks/nextflow/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = perl2nf(rules.assembly_single_perl.log.stdout),
stderr = perl2nf(rules.assembly_single_perl.log.stderr),
stdout = perl2nf(rules.assembly_pacbio_perl.log.stdout),
stderr = perl2nf(rules.assembly_pacbio_perl.log.stderr),
shell:
"""
{params.prgm} --kt --a {input.assembly} --o {params.outdir} --pre {params.prefix} --u {input.unpaired} 1> {log.stdout} 2> {log.stderr}
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} --keep-temporary --assembly {input.assembly} --output {params.outdir} --prefix {params.prefix} --pacbio {input.pacbio} 1> {log.stdout} 2> {log.stderr}
"""
OF = WD+"assembly_paired_multi/perl/"
perl2nf = lambda s: s.replace(r"/perl/",r"/nextflow/")
perl2nf_params = lambda s: s.replace(r"/perl/",r"/nextflow_params/")
NAME = "pepepe"
rule assembly_paired_multi_perl:
......@@ -20,12 +21,15 @@ rule assembly_paired_multi_perl:
outdir=OF,
files=lambda wildcards, input: ["-p "+",".join(x) for x in zip(input.paired1,input.paired2)],
prgm=config["original_perl_script"]
threads: 1
threads: 16
benchmark: repeat("benchmarks/perl/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = "{folder}{name}.stdout.log".format(folder=OF, name=NAME),
stderr = "{folder}{name}.stderr.log".format(folder=OF, name=NAME)
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} -kt -v -a {input.assembly} -o {params.outdir} -pre {params.prefix} -t {threads} {params.files} 1> {log.stdout} 2> {log.stderr}
"""
......@@ -39,14 +43,43 @@ rule assembly_paired_multi_nextflow:
params:
prefix=rules.assembly_paired_multi_perl.params.prefix,
outdir=perl2nf(rules.assembly_paired_multi_perl.params.outdir),
files=lambda wildcards, input: [",".join(x) for x in zip(input.paired1,input.paired2)],
files=lambda wildcards, input: ",".join([",".join(x) for x in zip(input.paired1,input.paired2)]),
prgm = config["nextflow_script"]
threads: 1
threads: 16
benchmark: repeat("benchmarks/nextflow/{}.tsv".format(NAME), config["benchmark_repeats"])
log: