9bbc7ad9fed64b3de61c99930f40c0171d3743de to be45c9bb2da0fc7550b09ab72a811af9781665e6 · SOaAS / psot

default_repo/modules/hmmsearch_vs_sorfdb.yaml0 → 100644

+28
−0

Original line number
Diff line number
Diff line

# Module manifest for the hmmscan against sORFdb small protein family analysis

# The name of the module. Is needed for the list-analyses option, for custom

# configurations and custom profiles

name: 'hmmer_sorfdb'

# Short description of the analysis

info: 'hmmscan analysis against sORFdb small protein families'

# The name of the script for the analysis step. Must take a --fasta parameter

analysis:

    script: 'run_hmmer.py'

    parameters:

        database: 'sorfdb/sorfdb.1.0.hmm'

        ga: 'True'

    execution:

        cluster:

            chunksize: 200

    container:

        docker: 'proteogenomicsworkflow/hmmer:3.4'

        singularity: 'proteogenomicsworkflow/hmmer:3.4'

# The name of the result to json converter script. Must take one parameter, the

# result file from the analysis_script

converter:

    script: 'convert_hmmer.py'

    parameters:

        dbxref: 'sORFdb'

default_repo/profiles/bacteria-sorfdb.yaml0 → 100644

+5
−0

Original line number
Diff line number
Diff line

name: 'bacteria-sorfdb'

info: 'Profile for sORFdb HMM family search'

modules:

  include_sequence:

  hmmer_sorfdb:

default_repo/scripts/run_hmmer.py

+4
−1

Original line number
Diff line number
Diff line

@@ -11,12 +11,15 @@ parser.add_argument('--fasta', '-f', required=True, help='A fasta file with amin

parser.add_argument('--database', '-d', required=True, help='Database to search in')

parser.add_argument('--output', '-o', required=True, help='The result directory')

parser.add_argument('--evalue', '-e', default='0.0001', help='Evalue cutoff')

parser.add_argument('--ga', '-g', default=False, type=bool, 

                    help="Use profile's GA gathering cutoffs to set all thresholding")

args = parser.parse_args()

print('mkdir -p ' + args.output)

cutoff = " --cut_ga " if args.ga else f" -E {args.evalue} "

print(hmmscan_tool + 

        " -E " + args.evalue + 

        cutoff +

        " -o " + args.output + "/hmmscan.out " + 

        " --tblout " + args.output + "/tblout.tsv "  + 

        " --domtblout " + args.output + "/domtblout.tsv " + 

psot/nextflow.py

+5
−4

Original line number
Diff line number
Diff line

@@ -6,13 +6,14 @@ from copy import deepcopy

import shutil

import collections

import sys

from collections.abc import import MutableMapping

# taken from https://stackoverflow.com/questions/6027558/flatten-nested-python-dictionaries-compressing-keys

def flatten(d, parent_key='', sep='_'):

    items = []

    for k, v in d.items():

        new_key = parent_key + sep + k if parent_key else k

        if isinstance(v, collections.MutableMapping):

        if isinstance(v, MutableMapping):

            items.extend(flatten(v, new_key, sep=sep).items())

        else:

            items.append((new_key, v))

@@ -248,7 +249,7 @@ beforeScript_modul_config_template = Template('''

    )

beforeScript_norm_config_template = Template('''

    withName:normalizing_fasta{

    withName:normalize_fasta{

        ${beforeScript}

    }

    '''

@@ -419,9 +420,9 @@ def generate_nextflow_config(execution):

            config['beforeScript'] = "beforeScript = 'export PS1=; source " + execution['venv'] + "/bin/activate'"

            if execution['fetch_dbxrefs']:

                process_names_list = Template('convert_${id}_to_json|${id}_restore_headers_json|retrieve_dbxrefs_for_${id}').substitute(config).split('|')

                process_names_list = Template('convert_${id}_to_json|${id}_restore_ids|retrieve_dbxrefs_for_${id}').substitute(config).split('|')

            else:

                process_names_list = Template('convert_${id}_to_json|${id}_restore_headers_json').substitute(config).split('|')

                process_names_list = Template('convert_${id}_to_json|${id}_restore_ids').substitute(config).split('|')

            fragments.append(analysis_config_template.substitute(config))

            for process in process_names_list:

requirements.txt

+1
−0

Original line number
Diff line number
Diff line

sphinx

sphinx_rtd_theme

requests

jsonmerge

setup.py

+0
−3

Original line number
Diff line number
Diff line

from setuptools import setup

# this is only necessary when not using setuptools/distribute

from sphinx.setup_command import BuildDoc

cmdclass = {'build_sphinx': BuildDoc}

setup(

  setup_requires=['pbr'],

Compare revisions

Source

Target

Commits on Source 8

Files

default_repo/modules/hmmsearch_vs_sorfdb.yaml

default_repo/profiles/bacteria-sorfdb.yaml

default_repo/scripts/run_hmmer.py

psot/nextflow.py

requirements.txt

setup.py