main.py 5.83 KB
Newer Older
1
2
3
#!/usr/bin/env python3
import argparse
import os
4
import subprocess
5
6
7
import copy
import shutil
import tempfile
8
import sys
9
import json
10
11
12
from psot.config import load_config
from psot.nextflow import setup_execution_directory, execute_analysis

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

def main():
    parser = argparse.ArgumentParser(description='Make bioinformatic observations on aminoacid sequences')
    parser.set_defaults(func=help)

    subparsers = parser.add_subparsers()
    info_parser = subparsers.add_parser('info')
    info_parser.add_argument('--listanalyses', '-l', action='store_true', help='Show available analysis steps')
    info_parser.set_defaults(func=info)

    analyze_parser = subparsers.add_parser('analyze')

    analyze_parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
    analyze_parser.add_argument('--output', '-o', required=True, help='The output directory for the json documents')
    analyze_parser.add_argument('--profile', '-p', default='fast', help='The profile to use')
    analyze_parser.add_argument('--live', '-l', action='store_true', help='Report results as they are computed, not only at the end of the computation. The live results will be available in the $output/live.')
    analyze_parser.add_argument('--config', '-c', help='The config to use')
30
    analyze_parser.add_argument('--fetch_informations', '-i', action='store_true', help='Fetch informations')
31
32
    analyze_parser.add_argument('--debug', '-d', action='store_true', help='Debug mode, computation directory will not be removed after computation')
    analyze_parser.add_argument('--execution_dir', '-e', help='Use the specified execution directory and do not delete it after the computation')
33
    analyze_parser.add_argument('--use_cluster', '-C', action='store_true', help='Use compute cluster for execution')
34
35
36
37
38
    
    images = analyze_parser.add_mutually_exclusive_group()
    images.add_argument('--docker', '-D', action='store_true', help='Use docker image for computation')
    images.add_argument('--singularity', '-S', action='store_true', help='Use singularity image for computation')
    
39
40
41
42
    analyze_parser.set_defaults(func=analyze)

    args = parser.parse_args()
    args.parser = parser
43
    config = load_config(arguments=args)
44
45
46
47
48
49
50
51
52
    args.func(args, config)

def help(args, config):
    args.parser.print_help()

def info(args, config):
    show_analyses(config)

def analyze(args, config):
53
54
55
    if not os.path.isfile(args.fasta):
      print("Given fasta file does not exist")
      sys.exit(1)
56
57
    execution = generate_execution(config, args)
    if args.debug:
58
        print(json.dumps(execution, indent=2))
59
    download_databases(execution)
60
    setup_execution_directory(execution)
61
    error_code = execute_analysis(execution)
62
    cleanup(execution)
63
    exit(error_code)
64
65
66
67
68
69
70
71

def cleanup(execution):
    if not execution['debug']:
        shutil.rmtree(execution['directory'])

def generate_execution(config, args):
    execution = {}
    execution['debug'] = args.debug
72
    execution['use_cluster'] = args.use_cluster
73
    execution['fetch_informations'] = args.fetch_informations 
74
75
76
    execution['mode'] = 'live' if args.live else 'complete'
    execution['fasta'] = os.path.abspath(args.fasta)
    execution['output'] = os.path.abspath(args.output)
77
    execution['install_path'] = config['install_path']
78
    execution['helpers_path'] = config['helpers_path']
79
    execution['database_path'] = config['databases']['localpath']
80
81
    execution['docker'] = args.docker
    execution['singularity'] = args.singularity
82
83
    if 'venv' in config:
        execution['venv'] = config['venv']
84
85
86
    if args.execution_dir:
        execution['directory'] = os.path.abspath(args.execution_dir)
    else:
87
88
89
90
        if args.use_cluster:
            execution['directory'] = tempfile.mkdtemp(dir='/vol/sge-tmp')
        else:
            execution['directory'] = tempfile.mkdtemp()
91
92
93
94
95
    execution['modules'] = generate_execution_modules_for_profile(config, args.profile)
    return execution
    
def generate_execution_modules_for_profile(config, profile):
    # find profile by name
96
97
98
99
    profiles = [x for x in config['profiles'] if x['name'] == profile]
    if len(profiles) < 1:
        raise Exception("Profile not found " + profile)
    p = profiles[0]
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
    modules = copy.deepcopy(p['modules'])
    # generate unique ids for each module
    for module in modules:
        module['id'] = module['name']
        module['analysis']['params'] = generate_params_string(module['analysis']['parameters'])
        module['converter']['params'] = generate_params_string(module['converter']['parameters'])
    return modules

def generate_params_string(options):
    params = ''
    if options:
        l = []
        for k in options:
            if options[k]:
                l.append('--' + k + " '" + options[k] + "'")
            else:
                l.append('--' + k)
        params = ' '.join(l)
    return params


def show_analyses(config):
    print('Profiles:')
    for profile in config['profiles']:
        print('   {0:<20} - {1}'.format(profile['name'], profile['info']))
        if 'modules' in profile:
            for module in profile['modules']:
                print('      {0:<20}'.format(module['name']))

    print()
    print('Available modules for custom profile:')
    for key in config['modules']:
        module = config['modules'][key]
        print('   {0:<20} - {1}'.format(module['name'], module['info']))

135
136
137
138
139
140
141
142
143
144
145
146
147
148
def download_databases(execution):
    
    script = execution['helpers_path'] + '/download_databases_from_s3bucket.sh'
    logfile = execution['install_path'] + '/database_paths.log'
    database_path = execution['database_path']
    
    retcode = 1 
    try:
        run_download_script = subprocess.run([script, logfile, database_path])
        retcode = run_download_script.returncode
    except OSError as e:
        print("Execution failed:", e, file=sys.stderr)
    return retcode

149
150
if __name__ == "__main__":
    main()