Commit 954176c8 authored by Raphael Müller's avatar Raphael Müller
Browse files

beauty treatment

parent b4cc9a46
#!/usr/bin/env groovy
/*
* Check if files are specified and create channels for input files
* If files are not specified processes will not start
*/
/**
* backmap.pl v0.3
*
* Description:
* Automatic mapping of paired, unpaired, PacBio and Nanopore reads to an
* assembly, execution of qualimap bamqc, multiqc and estimation of genome size
* from mapped nucleotides and peak coverage.
* The tools bwa, minimap2, samtools, qualimap, multiqc, bedtools and Rscript need to be
* in your $PATH.
*
* Usage:
* backmap.pl [-a <assembly.fa> {-p <paired_1.fq>,<paired_2.fq> | -u <unpaired.fq> |
* -pb <pacbio.fq> | -ont <ont.fq> } | -b <mapping.bam>]
*
* Mandatory:
* -a STR Assembly were reads should mapped to in fasta format
* AND AT LEAST ONE OF
* -p STR Two fastq files with paired Illumina reads comma sperated
* -u STR Fastq file with unpaired Illumina reads
* -pb STR Fasta or fastq file with PacBio reads
* -ont STR Fasta or fastq file with Nanopore reads
* OR
* -b STR Bam file to calculate coverage from
* Skips read mapping
* Overrides -nh
* Technologies will recognized correctly if filenames end with
* .pb(.sort).bam or .ont(.sort).bam for PacBio and Nanopore respectively.
* Otherwise they are assumed to be from Illumina.
* All mandatory options except of -a can be specified multiple times
*
* Options: [default]
* -o STR Output directory [.]
* Will be created if not existing
* -t INT Number of parallel executed processes [1]
* Affects bwa mem, samtools sort, qualimap bamqc
* -pre STR Prefix of output files if -a is used [filename of -a]
* -sort Sort the bam file(s) (-b) [off]
* -nq Do not run qualimap bamqc [off]
* -nh Do not create coverage histogram [off]
* Implies -ne
* -ne Do not estimate genome size [off]
* -kt Keep temporary bam files [off]
* -bo STR Options passed to bwa [-a -c 10000]
* -mo STR Options passed to minimap [PacBio: -H -x map-pb; ONT: -x map-ont]
* -qo STR Options passed to qualimap [none]
* Pass options with quotes e.g. -bo "<options>"
* -v Print executed commands to STDERR [off]
* -dry-run Only print commands to STDERR instead of executing [off]
*
* -h or -help Print this help and exit
* -version Print version number and exit
*
*/
VERSION = "v0.4"
HELP = """
backmap $VERSION
......@@ -128,13 +69,7 @@ Optional Options: [default]
--version Print version number and exit
--debug Print inner variables
"""
// Files:
// Genome Assembly file; default: None
// Single end read files; default: None
// Paired end read files; default: None
// Pacbio read files; default: None
// Oxford Nanopore files; default: None
params["assembly"] = 'NO_FILE'
params["single-end"] = 'NO_FILE'
......@@ -145,10 +80,8 @@ params["illumina-bam-files"] = 'NO_FILE'
params["pacbio-bam-files"] = 'NO_FILE'
params["nanopore-bam-files"] = 'NO_FILE'
// File separator; default: ','
params["file-separator"] = ","
// Output files & Prefix
params["output"] = './Results/'
params["prefix"] = "tbg_backmap"
params["threads"] = 1
......@@ -167,7 +100,6 @@ params["minimap-pacbio-options"] = "-H"
params["minimap-nanopore-options"] = ""
params["qualimap-options"] = ""
// Variables
assembly = params["assembly"]
singleEnd = params["single-end"]
pairedEnd = params["paired-end"]
......@@ -270,23 +202,11 @@ if(printVersion){
exit 1
}
//folders
String BWAINDEX = "${output}/bwa_index"
// BAM files
params.bam = 'NO_FILE'
if (params.bam != 'NO_FILE'){
Channel.from(bams).map { files -> files.split(fileSeparator)}.flatten().map{file it}.collate(1)
.set{bam_file}
bamAvailable=true
} else {
Channel.empty()
.set{bam_file}
}
/*
* Starting calculations
* Process declarations
*/
if(bamAvailable){
......@@ -412,6 +332,7 @@ process mergeIlluminaBamFiles {
"""
}//process: mergeIlluminaBamFiles
// Merging of PacBio mapped reads
process mergePacbioBamFiles {
if(keepTemporaryFiles){
publishDir output, mode: 'copy'
......@@ -427,6 +348,7 @@ process mergePacbioBamFiles {
"""
}//process: mergePacbioBamFiles
// Merging of PacBio mapped reads
process mergeNanoporeBamFiles {
if(keepTemporaryFiles){
publishDir output, mode: 'copy'
......@@ -442,7 +364,7 @@ process mergeNanoporeBamFiles {
"""
}//process: mergePacbioBamFiles
// Sorting all Bams
// Sorting all Bam files
process sortAllBamFiles {
publishDir output, mode: 'copy'
......@@ -457,7 +379,6 @@ process sortAllBamFiles {
"""
}//process: sortAllBamFiles
// Qualimap QC of sorted BAM files
if (qualityControl){
process qualimap {
......@@ -476,7 +397,6 @@ if (qualityControl){
"""
}//process: qualimap
//
// Multiqc QC with qualimap results
process multiqc {
......@@ -484,10 +404,8 @@ if (qualityControl){
cpus 1
input:
file qualis from chMultiqcQualimapResults.collect()
output:
file "*" into chMultiqcOut
//
script:
"""
multiqc -f -s -m qualimap -o . . > multiqc.log 2> multiqc.err
......@@ -511,7 +429,7 @@ if(coverageHistogram){
"""
}//process bedtoolsGenomecov
// R script for coverage
// R script for single coverage
process RcoveragePlotSingle {
publishDir output, mode: 'copy'
cpus 1
......@@ -562,13 +480,11 @@ if(coverageHistogram){
library(dplyr)
options(scipen=999)
# Set up variable to control command line arguments
tech_files <- c("${tech_cov.join('","')}")
techs <- tech_files[seq(1,length(tech_files),2)]
files <- tech_files[seq(2,length(tech_files),2)]
df <- data.frame()
for(i in 1:length(files)){
df.current <- read.table(files[i], sep = "\\t")
......@@ -588,7 +504,7 @@ if(coverageHistogram){
scale_x_log10() +
annotation_logticks(base=10, sides="b") +
scale_color_discrete(name = "", breaks = techs, labels = legend.labels) +
theme(legend.position = c(.95, .95), legend.justification = c("right", "top"), legend.box.just = "right", legend.margin = margin(6, 6, 6, 6))+
theme(legend.position = c(.95, .95), legend.justification = c("right", "top"), legend.box.just = "right", legend.margin = margin(6, 6, 6, 6)) +
ggtitle(maintitle)
ggsave("${prefix}.plot.all.pdf", plot = p, device = "pdf")
......@@ -596,6 +512,7 @@ if(coverageHistogram){
}//process: RMultiCoveragePlot
if(genomeSizeEstimation){
//calculate samtools stats
process samtoolsStats {
publishDir output, mode: 'copy'
cpus 1
......@@ -610,6 +527,7 @@ if(coverageHistogram){
"""
}//process samtoolsStats
// calculate peak
process peakCalculation {
cpus 1
input:
......@@ -622,6 +540,7 @@ if(coverageHistogram){
"""
}//process peakCalculation
//calculate total number of mapped nucleotides
process totalNucleotideCalculation {
cpus 1
input:
......@@ -635,8 +554,9 @@ if(coverageHistogram){
}//process totalNucleotideCalculation
if(assemblyAvailable){
chAssemblyNucleotides = Channel.fromPath(assembly)
//count number of nucleotides of given assembly
chAssemblyNucleotides = Channel.fromPath(assembly)
process assemblyNucleotideCounting {
cpus 1
input:
......@@ -659,8 +579,10 @@ if(coverageHistogram){
//Print results
/*
* Print results
*/
//closure for rounding in nice format
rfp = {
num = it
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment