Commit 2874ec5a authored by Oliver Schwengers's avatar Oliver Schwengers 💬
Browse files

Merge branch 'long' into 'master'

Added Support for longer genomes

See merge request !2
parents 32cd8a0c 0ee93f84
......@@ -589,10 +589,10 @@ if (coverageHistogram) {
/*
* Print results
*/
chPeakAsInteger = chPeak.map { num -> [num[0], Integer.parseInt(num[1])] }
chTotalNucleotidesAsInteger = chTotalNucleotidesResults.map { num -> [num[0], Integer.parseInt(num[1])] }
chGenomeSizeEstimationResults = chTotalNucleotidesAsInteger
.join(chPeakAsInteger)
chPeakAsLong = chPeak.map { num -> [num[0], Long.parseLong(num[1])] }
chTotalNucleotidesAsLong = chTotalNucleotidesResults.map { num -> [num[0], Long.parseLong(num[1])] }
chGenomeSizeEstimationResults = chTotalNucleotidesAsLong
.join(chPeakAsLong)
.join(chGenomeEstimationSortedBams)
.map { res -> [res[0], res[3], res[1], res[2], res[1] / res[2]] }
......@@ -605,7 +605,7 @@ if (coverageHistogram) {
}
} // closure rfp
chAssemblyGenomeSize.map { num -> Integer.parseInt(num) }.combine(chGenomeSizeEstimationResults).map { res ->
chAssemblyGenomeSize.map { num -> Long.parseLong(num) }.combine(chGenomeSizeEstimationResults).map { res ->
assemblyLength = res[0]
tech = res[1]
file = res[2]
......@@ -628,8 +628,6 @@ if (coverageHistogram) {
"""
}
return result
}.set { chResults }
chResults.view()
}.collectFile(name: "${prefix}.results.txt", newLine: false, storeDir: output)
} //fi: genomeSizeEstimation
} //fi: coverageHistogram
......@@ -7,6 +7,7 @@ NFS = config["nextflow_script"]
OPS = config["original_perl_script"]
DATA = config["datadir"]
ASSEMBLY = config["assembly"]
ASSEMBLY_BIG = config["assembly_big"]
WD = config["workdir"]+"/"
include: "snakemake_workflows/bam_prep.smk"
......@@ -78,6 +79,7 @@ rule all:
rules.bams_illumina_paired_pacbio_nanopore_perl.output,
rules.bams_illumina_paired_pacbio_nanopore_nextflow.output,
rules.bams_with_assembly_illumina_paired_pacbio_nanopore_nextflow.output,
rules.assembly_big_single_pacbio_nextflow.output,
rule data_prep:
......@@ -126,7 +128,7 @@ rule nanosim:
output:
"data_prep/nanopore.sim.{number}.fastq"
conda: "envs/nanosim.yml"
shadow: "full"
shadow: "minimal"
threads: 8
shell:
"""
......
......@@ -3,8 +3,9 @@ benchmark_repeats: 3
workdir: "results"
original_perl_script: "perl ../original_wf/backmap/backmap.pl"
nextflow_script: "nextflow ../nextflow/backmap.nf -c nextflow.config --threads 16"
assembly: "../dataset/dgal_ra_pb-target-and-other_ill-confilter.blobfilter.rmmt_sspace-lr3_lrgc3_pg3_pilon_3.fasta"
datadir: "../dataset/"
assembly: "dataset/dgal_ra_pb-target-and-other_ill-confilter.blobfilter.rmmt_sspace-lr3_lrgc3_pg3_pilon_3.fasta"
assembly_big: "dataset/dgal.big.fasta"
datadir: "dataset/"
unpaired: dgal.confilter.fq
paired_1: dgal_1.paired.confilter.fq
paired_2: dgal_2.paired.confilter.fq
......
../dataset/
\ No newline at end of file
......@@ -3,4 +3,6 @@ channels:
- bioconda
- defaults
dependencies:
- python=3.7
- nanosim
- scikit-learn=0.20
OF = WD+"assembly_single_pacbio/perl/"
perl2nf = lambda s: s.replace(r"/perl/",r"/nextflow/")
perl2nf_big = lambda s: s.replace(r"/perl/",r"/nextflow_big/")
NAME = "sepb"
dir_outputs = [OF+"multiqc_data",OF+NAME+".sort_stats"]
......@@ -41,6 +42,7 @@ rule assembly_single_pacbio_nextflow:
output:
[perl2nf(x) for x in rules.assembly_single_pacbio_perl.output if x not in dir_outputs],
[directory(perl2nf(x)) for x in dir_outputs],
OF+NAME+".results.txt"
params:
prefix=rules.assembly_single_pacbio_perl.params.prefix,
outdir=perl2nf(rules.assembly_single_pacbio_perl.params.outdir),
......@@ -58,3 +60,28 @@ rule assembly_single_pacbio_nextflow:
{params.prgm} --keep-temporary --assembly {input.assembly} --output {params.outdir} --prefix {params.prefix} --single-end {input.unpaired} --pacbio {input.pacbio} 1> {log.stdout} 2> {log.stderr}
"""
rule assembly_big_single_pacbio_nextflow:
input:
assembly = ASSEMBLY_BIG,
unpaired = rules.assembly_single_pacbio_perl.input.unpaired,
pacbio = rules.assembly_single_pacbio_perl.input.pacbio,
output:
[perl2nf_big(x) for x in rules.assembly_single_pacbio_perl.output if x not in dir_outputs],
[directory(perl2nf_big(x)) for x in dir_outputs],
OF+NAME+".results.txt"
params:
prefix=rules.assembly_single_pacbio_perl.params.prefix,
outdir=perl2nf_big(rules.assembly_single_pacbio_perl.params.outdir),
prgm=config["nextflow_script"]
threads: 16
conda: "../envs/nextflow.yml"
benchmark: repeat("benchmarks/nextflow/{}.tsv".format(NAME), config["benchmark_repeats"])
log:
stdout = perl2nf_big(rules.assembly_single_pacbio_perl.log.stdout),
stderr = perl2nf_big(rules.assembly_single_pacbio_perl.log.stderr),
shell:
"""
rm -rf {params.outdir};
mkdir -p {params.outdir};
{params.prgm} --keep-temporary --assembly {input.assembly} --output {params.outdir} --prefix {params.prefix} --single-end {input.unpaired} --pacbio {input.pacbio} 1> {log.stdout} 2> {log.stderr}
"""
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment