Commit a1478fa6 authored by Raphael Müller's avatar Raphael Müller
Browse files

added tests for bam processing

parent 50229cf6
......@@ -9,6 +9,8 @@ DATA = config["datadir"]
ASSEMBLY = config["assembly"]
WD = config["workdir"]+"/"
include: "snakemake_workflows/bam_prep.smk"
include: "snakemake_workflows/bams_illumina_pacbio_nanopore.smk"
include: "snakemake_workflows/nanopore.smk"
include: "snakemake_workflows/nanopore_multi.smk"
include: "snakemake_workflows/pacbio_multi.smk"
......@@ -73,6 +75,8 @@ rule all:
rules.assembly_single_paired_pacbio_nextflow.output,
rules.assembly_single_paired_perl.output,
rules.assembly_single_paired_nextflow.output,
rules.bams_illumina_paired_pacbio_nanopore_perl.output,
rules.bams_illumina_paired_pacbio_nanopore_nextflow.output,
#test_cases = [os.path.join(config["workdir"],key,method,value["output_file"]+value["extension"]) for key,value in config["cases"].items() for method in ["original", "nextflow"]]
......
......@@ -178,9 +178,9 @@ println "minimapNanoporeOptions $minimapNanoporeOptions "
if(bamAvailable){
//fill bam channels for sorting
chIlluminaBamMerge = (illuminaBamsAvailable) ? Channel.from(illuminaBamFiles).map{files -> files.split(fileSeparator)}.flatten().map{tuple val("Illumina"), file(it)} : Channel.empty()
chPacbioBamMerge = (pacbioBamsAvailable) ? Channel.from(pacbioBamFiles).map { files -> files.split(fileSeparator)}.flatten().map{tuple val("Pacbio"), file(it)} : Channel.empty()
chNanoporeBamMerge = (nanoporeBamsAvailable) ? Channel.from(nanoporeBamFiles).map { files -> files.split(fileSeparator)}.flatten().map{tuple val("Nanopore"), file(it)} : Channel.empty()
chIlluminaBamMerge = (illuminaBamsAvailable) ? Channel.from(illuminaBamFiles).map{files -> files.split(fileSeparator)}.flatten().map{ file(it)} : Channel.empty()
chPacbioBamMerge = (pacbioBamsAvailable) ? Channel.from(pacbioBamFiles).map { files -> files.split(fileSeparator)}.flatten().map{file(it)} : Channel.empty()
chNanoporeBamMerge = (nanoporeBamsAvailable) ? Channel.from(nanoporeBamFiles).map { files -> files.split(fileSeparator)}.flatten().map{file(it)} : Channel.empty()
}else if(assemblyAvailable){ //fi: bamAvailable
chBwaIndexGenome = (pairedAvailable || unpairedAvailable) ? Channel.fromPath(assembly) : Channel.empty()
chUnpairedReadMapping = (unpairedAvailable) ? Channel.from(singleEnd).map{files -> files.split(fileSeparator)}.flatten().map{file it}.collate(1) : Channel.empty()
......
rule illumina_bams:
input:
fasta = "data_prep/unpaired.{divider}.{offset}.fastq",
genome = ASSEMBLY,
output:
"data_prep_bams/illumina.{divider,[0-9]+}.{offset,[0-9]+}.bam"
shadow: "shallow"
threads: 16
shell:
"""
bwa index -p index {input.genome}
bwa mem -t {threads} -a -c 10000 index {input.fasta} | samtools view -1 -b - > {output}
"""
rule pacbio_bams:
input:
fasta = "data_prep/pacbio.{divider}.{offset}.fastq",
genome = ASSEMBLY,
output:
"data_prep_bams/pacbio.{divider,[0-9]+}.{offset,[0-9]+}.pb.bam"
shadow: "shallow"
threads: 16
shell:
"""
minimap2 -H -x map-pb -a -t {threads} {input.genome} {input.fasta} | samtools view -1 -b - > {output}
"""
rule nanopore_bams:
input:
fasta = "data_prep/nanopore.sim.{num}.fastq",
genome = ASSEMBLY,
output:
"data_prep_bams/nanopore.{num,[0-9]+}.ont.bam"
shadow: "shallow"
threads: 16
shell:
"""
minimap2 -x map-ont -a -t {threads} {input.genome} {input.fasta} | samtools view -1 -b - > {output}
"""
OF = WD+"bams_illumina_pacbio_nanopore/perl/"
perl2nf = lambda s: s.replace(r"/perl/",r"/nextflow/")
NAME = "ba_illpana"
dir_outputs = [OF+"multiqc_data",*expand(OF+NAME+"{t}.sort_stats",t=["",".pb",".ont"])]
rule bams_illumina_paired_pacbio_nanopore_perl:
input:
illumina = "data_prep_bams/illumina.100.1.bam",
pacbio = "data_prep_bams/pacbio.1000.1.pb.bam",
nanopore = "data_prep_bams/nanopore.1000.ont.bam"
output:
multiext(OF,
"illumina.100.1.sort.bam.cov-hist",
"illumina.100.1.sort.bam.cov-hist.err",
"illumina.100.1.sort.bam.cov-hist.log",
"illumina.100.1.sort.bam.cov-hist.pdf",
"illumina.100.1.sort.bam.cov-hist.plot.r",
"illumina.100.1.sort_stats_bamqc.err",
"illumina.100.1.sort_stats_bamqc.log",
"multiqc.err",
"multiqc.log",
"multiqc_report.html",
"nanopore.1000.ont.sort.bam.cov-hist",
"nanopore.1000.ont.sort.bam.cov-hist.err",
"nanopore.1000.ont.sort.bam.cov-hist.log",
"nanopore.1000.ont.sort.bam.cov-hist.pdf",
"nanopore.1000.ont.sort.bam.cov-hist.plot.r",
"nanopore.1000.ont.sort_stats_bamqc.err",
"nanopore.1000.ont.sort_stats_bamqc.log",
"pacbio.1000.1.pb.sort.bam.cov-hist",
"pacbio.1000.1.pb.sort.bam.cov-hist.err",
"pacbio.1000.1.pb.sort.bam.cov-hist.log",
"pacbio.1000.1.pb.sort.bam.cov-hist.pdf",
"pacbio.1000.1.pb.sort.bam.cov-hist.plot.r",
"pacbio.1000.1.pb.sort_stats_bamqc.err",
"pacbio.1000.1.pb.sort_stats_bamqc.log",
"plot.all.pdf",
"plot.all.r",
"plot.all.r.err",
"plot.all.r.log"),
directory(OF+"illumina.100.1.sort_stats/"),
directory(OF+"multiqc_data/"),
directory(OF+"nanopore.1000.ont.sort_stats/"),
directory(OF+"pacbio.1000.1.pb.sort_stats/"),
benchmark: repeat("benchmarks/perl/{}.tsv".format(NAME), config["benchmark_repeats"])
threads: 16
params:
prefix=NAME,
outdir=OF,
prgm=config["original_perl_script"]
log:
stdout = "{folder}{name}.stdout.log".format(folder=OF, name=NAME),
stderr = "{folder}{name}.stderr.log".format(folder=OF, name=NAME)
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} -kt -v -b {input.illumina} -b {input.pacbio} -b {input.nanopore} -o {params.outdir} -sort -t {threads} 1> {log.stdout} 2> {log}.stderr
"""
rule bams_illumina_paired_pacbio_nanopore_nextflow:
input:
illumina = "data_prep_bams/illumina.100.1.bam",
pacbio = "data_prep_bams/pacbio.1000.1.pb.bam",
nanopore = "data_prep_bams/nanopore.1000.ont.bam"
output:
perl2nf(OF+"ba_illpana.bam"),
perl2nf(OF+"ba_illpana.ont.bam"),
perl2nf(OF+"ba_illpana.ont.sort.bam"),
perl2nf(OF+"ba_illpana.ont.sort.bam.cov-hist"),
perl2nf(OF+"ba_illpana.ont.sort.bam.cov-hist.pdf"),
perl2nf(OF+"ba_illpana.ont.sort.bam.stats"),
perl2nf(OF+"ba_illpana.ont.sort.bam.stats.err"),
perl2nf(OF+"ba_illpana.ont.sort_stats_bamqc.err"),
perl2nf(OF+"ba_illpana.ont.sort_stats_bamqc.log"),
perl2nf(OF+"ba_illpana.pb.bam"),
perl2nf(OF+"ba_illpana.pb.sort.bam"),
perl2nf(OF+"ba_illpana.pb.sort.bam.cov-hist"),
perl2nf(OF+"ba_illpana.pb.sort.bam.cov-hist.pdf"),
perl2nf(OF+"ba_illpana.pb.sort.bam.stats"),
perl2nf(OF+"ba_illpana.pb.sort.bam.stats.err"),
perl2nf(OF+"ba_illpana.pb.sort_stats_bamqc.err"),
perl2nf(OF+"ba_illpana.pb.sort_stats_bamqc.log"),
perl2nf(OF+"ba_illpana.plot.all.pdf"),
perl2nf(OF+"ba_illpana.sort.bam"),
perl2nf(OF+"ba_illpana.sort.bam.cov-hist"),
perl2nf(OF+"ba_illpana.sort.bam.cov-hist.pdf"),
perl2nf(OF+"ba_illpana.sort.bam.stats"),
perl2nf(OF+"ba_illpana.sort.bam.stats.err"),
perl2nf(OF+"ba_illpana.sort_stats_bamqc.err"),
perl2nf(OF+"ba_illpana.sort_stats_bamqc.log"),
perl2nf(OF+"multiqc.err"),
perl2nf(OF+"multiqc.log"),
perl2nf(OF+"multiqc_report.html"),
directory(perl2nf(OF+"ba_illpana.ont.sort_stats/")),
directory(perl2nf(OF+"ba_illpana.pb.sort_stats/")),
directory(perl2nf(OF+"ba_illpana.sort_stats/")),
directory(perl2nf(OF+"multiqc_data/")),
benchmark: repeat("benchmarks/nextflow/{}.tsv".format(NAME), config["benchmark_repeats"])
threads: 16
params:
prefix=NAME,
outdir=perl2nf(OF),
prgm=config["nextflow_script"]
log:
stdout = perl2nf("{folder}{name}.stdout.log".format(folder=OF, name=NAME)),
stderr = perl2nf("{folder}{name}.stderr.log".format(folder=OF, name=NAME))
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} --keep-temporary --output {params.outdir} --prefix {params.prefix} --illumina-bam-files {input.illumina} --pacbio-bam-files {input.pacbio} --nanopore-bam-files {input.nanopore} 1> {log.stdout} 2> {log.stderr}
"""
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment