main.py 19.8 KB
Newer Older
Rudel Fankep's avatar
Rudel Fankep committed
1
2
3
4
5
6
#!/home/theia/conda/bin/python

import argparse
import subprocess
import os
import shutil
Rudel Fankep's avatar
Rudel Fankep committed
7
8
9
10
11
import re
import wget
from datetime import date
import tarfile
import json
Rudel Fankep's avatar
Rudel Fankep committed
12
13
14


def myparser():
Rudel Fankep's avatar
Rudel Fankep committed
15
    parser = argparse.ArgumentParser(description='Download, convert and share databases to working computer', prog='Database Manager')
Rudel Fankep's avatar
Rudel Fankep committed
16
17
18
19
20

    subparsers = parser.add_subparsers(title='Subcommands', description='Authorized Subcommands', help='Additional help')

    prepare_parser = subparsers.add_parser('prepare', help='Databank download from the Website, extraction and transformation for another Programm')
    prepare_parser.add_argument('database', help='Database which have to be prepared', type=str)
Rudel Fankep's avatar
Rudel Fankep committed
21
22
23
    prepare_parser.add_argument('tool', help='Programm/Tool for the post analysis', type=str,)
    prepare_parser.add_argument('-d', '--directory', dest='dir', help='change the local directory to save/to get the data.', type=str)
    prepare_parser.add_argument('-v', '--version', help='version of the needed database. Standard will be the current release')
Rudel Fankep's avatar
Rudel Fankep committed
24
25
26
27
    prepare_parser.set_defaults(func=prepare)

    upload_parser = subparsers.add_parser('upload', help='Databank copy from the local directory to the web storage')
    upload_parser.add_argument('database', help='database to be transfered from the local directory', type=str)
Rudel Fankep's avatar
Rudel Fankep committed
28
29
30
31
    upload_parser.add_argument('tool', type=str, help='database type which have to be transfered."raw" instead of tool, if raw files are needed to be processed')
    upload_parser.add_argument('-s', '--s3store', dest='store', help='change the "S3" storage of the data ')
    upload_parser.add_argument('-d', '--directory', dest='dir', help='change the local directory to save/to get the data.', type=str)
    upload_parser.add_argument('-v', '--version', help='version of the needed database. Standard will be the current release')
Rudel Fankep's avatar
Rudel Fankep committed
32
33
34
35
    upload_parser.set_defaults(func=upload)

    download_parser = subparsers.add_parser('download', help='Datenbank copy from the web storage to the working computer')
    download_parser.add_argument('database', help='database to be transfered from "S3"', type=str)
Rudel Fankep's avatar
Rudel Fankep committed
36
37
38
39
    download_parser.add_argument('tool', type=str, help='database type which have to be transfered. "raw" instead of tool, if raw files are needed to be processed')
    download_parser.add_argument('-s', '--s3store', dest='store', help='change the "S3" storage of the data ')
    download_parser.add_argument('-d', '--directory', dest='dir', help='change the local directory to save/to get the data.', type=str)
    download_parser.add_argument('-v', '--version', help='version of the needed database. Standard will be the current release')
Rudel Fankep's avatar
Rudel Fankep committed
40
41
42
43
    download_parser.set_defaults(func=download)

    delete_parser = subparsers.add_parser('delete', help='delete existing files from local directory or from "S3"')
    delete_parser.add_argument('database', help='database which have to be delete')
Rudel Fankep's avatar
Rudel Fankep committed
44
45
46
47
48
    delete_parser.add_argument('tool', type=str, help='database type which have to be deleted."raw" instead of tool, if raw files are needed to be processed')
    delete_parser.add_argument('place', choices=['local', 's3'], help='defined the place where the database have to be delete')
    delete_parser.add_argument('-d', '--directory', dest='dir', help='change the local directory to save/to get the data.', type=str)
    delete_parser.add_argument('-s', '--s3store', dest='store', help='change the "S3" storage of the data ')
    delete_parser.add_argument('-v', '--version', help='version of the needed database. Standard will be the current release')
Rudel Fankep's avatar
Rudel Fankep committed
49
50
    delete_parser.set_defaults(func=delete)

Rudel Fankep's avatar
Rudel Fankep committed
51
52
53
54
55
56
57
58
59
60
    list_local_databases_parser = subparsers.add_parser('list_local_databases', help='print the list of local databases with some features')
    list_local_databases_parser.add_argument('-d', '--directory', dest='dir', help='change the local directory to save/to get the data.', type=str)
    list_local_databases_parser.set_defaults(func=list_local_databases)

    list_remote_databases_parser = subparsers.add_parser('list_remote_databases', help='print the list of remote databases with some features')
    list_remote_databases_parser.add_argument('-d', '--directory', dest='dir', help='change the local directory to save/to get the data.', type=str)
    list_remote_databases_parser.add_argument('-s', '--s3store', dest='store', help='change the "S3" storage of the data ')
    list_remote_databases_parser.set_defaults(func=list_remote_databases)

    list_recipes_parser = subparsers.add_parser('list_recipes', help='print databases with the possible Tool/s')
Rudel Fankep's avatar
Rudel Fankep committed
61
62
63
64
    list_recipes_parser.set_defaults(func=list_recipes)
    return parser.parse_args()


Rudel Fankep's avatar
Rudel Fankep committed
65
66
def get_local_databases_directory(args):
    database_directory = os.path.abspath("local_databases")
Rudel Fankep's avatar
Rudel Fankep committed
67
    if args.dir:
Rudel Fankep's avatar
Rudel Fankep committed
68
        database_directory = os.path.abspath(args.dir)
Rudel Fankep's avatar
Rudel Fankep committed
69
    elif "DBMAN_DBDIR" in os.environ:
Rudel Fankep's avatar
Rudel Fankep committed
70
71
        database_directory = os.environ["DBMAN_DBDIR"]
    return database_directory
Rudel Fankep's avatar
Rudel Fankep committed
72
73


Rudel Fankep's avatar
Rudel Fankep committed
74
def get_remote_databases_directory(args):
Rudel Fankep's avatar
Rudel Fankep committed
75
76
77
78
79
80
81
82
    web_dir = 's3://db_storage'
    if args.store:
        web_dir = args.store
    elif 'S3_DBDIR' in os.environ:
        web_dir = os.environ['S3_DBDIR']
    return web_dir


Rudel Fankep's avatar
Rudel Fankep committed
83
84
85
86
87
88
89
90
91
92
93
94
def get_raw_directory_name(args, version):
    return args.database+'_'+version+'_raw'


def get_tool_directory_name(args, version):
    return args.database+'_'+version+'_'+args.tool


def get_path_tool_directory(args, version):
    return path_maker(get_local_databases_directory(args), get_tool_directory_name(args, version))


Rudel Fankep's avatar
Rudel Fankep committed
95
96
97
98
99
100
101
102
103
def path_maker(directory, file):
    filedir = ''
    if directory[-1] == '/':
        filedir = directory+file
    else:
        filedir = directory+'/'+file
    return filedir


Rudel Fankep's avatar
Rudel Fankep committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def get_version(args):
    version = str(date.today())
    if args.version:
        version = args.version
    elif data[args.database]['version']:
        version = data[args.database]['version']()
    return version


def get_swissprot_version():
    file = wget.download("ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt")
    datei = open(file)
    satz = ''
    while 'Swiss-Prot' not in satz:
        satz = datei.readline()
    satz_search = re.search('[0-9]{4}_[0-9]{2}', satz)
    version = satz_search.group(0)
    datei.close()
    os.remove(file)
    return version


def get_pfam_version():
    file = wget.download("ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/relnotes.txt")
    datei = open(file)
    satz = ''
    while 'RELEASE' not in satz:
        satz = datei.readline()
    satz_search = re.search(r'[0-9]{2}\.[0-9]', satz)
    version = satz_search.group(0)
    datei.close()
    os.remove(file)
    return version


def get_card_version():
    os.mkdir('version')
    os.chdir('version')
    wget.download("https://card.mcmaster.ca/latest/data")
    tar = tarfile.open('card-data.tar.bz2')
    tar.extractall()
    tar.close()
    with open("card.json") as f:
        data = json.load(f)
        version = data["_version"]
    os.chdir('..')
    shutil.rmtree('version')
    return version


def get_local_json_version(args):
    if args.version:
        version = args.version
    else:
        with open(get_path_local_json(args)) as f:
            metadata = json.load(f)
        matched_dictionnary = []
        for dictionnary in metadata:
            if dictionnary['name'] == args.database and dictionnary['tool'] == args.tool:
                matched_dictionnary.append(dictionnary)
        if len(matched_dictionnary) == 1:
            version = matched_dictionnary[0]['version']
        elif len(matched_dictionnary) > 1:
            version = sorted(matched_dictionnary, key=lambda i: i['created'], reverse=True)[0]['version']
        else:
            version = 'error'
    return version
Rudel Fankep's avatar
Rudel Fankep committed
171
172


Rudel Fankep's avatar
Rudel Fankep committed
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
def get_remote_json_version(args):
    if args.version:
        version = args.version
    else:
        remote_metadata = get_remote_metadata(args)
        if remote_metadata != []:
            matched_dictionnary = []
            for dictionnary in remote_metadata:
                if dictionnary['name'] == args.database and dictionnary['tool'] == args.tool:
                    matched_dictionnary.append(dictionnary)
            if len(matched_dictionnary) == 1:
                version = matched_dictionnary[0]['version']
            elif len(matched_dictionnary) > 1:
                version = sorted(matched_dictionnary, key=lambda i: i['created'], reverse=True)[0]['version']
            else:
                version = 'error'
Rudel Fankep's avatar
Rudel Fankep committed
189
        else:
Rudel Fankep's avatar
Rudel Fankep committed
190
191
192
193
194
195
196
197
198
            version = 'error'
    return version


def get_path_local_json(args):
    return path_maker(get_local_databases_directory(args), 'dbman_metadata.json')


def get_path_remote_json(args):
Rudel Fankep's avatar
Rudel Fankep committed
199
    return path_maker(get_remote_databases_directory(args), 'dbman_remote_metadata.json')
Rudel Fankep's avatar
Rudel Fankep committed
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219


def get_local_metadata(args):
    if not os.path.isfile(get_path_local_json(args)):
        with open(get_path_local_json(args), 'w') as f:
            json.dump([], f)
            metadata = []
    else:
        with open(get_path_local_json(args)) as f:
            metadata = json.load(f)
    return metadata


def save_local_metadata(args, metaliste):
    with open(get_path_local_json(args), 'w') as f:
        json.dump(metaliste, f)


def get_remote_metadata(args):
    if get_path_remote_json(args) in get_remote_files():
Rudel Fankep's avatar
Rudel Fankep committed
220
        subprocess.run(['./download_json.sh', get_path_remote_json(args), get_local_databases_directory(args)])
Rudel Fankep's avatar
Rudel Fankep committed
221
222
223
224
225
226
227
228
229
        with open(path_maker(get_local_databases_directory(args), 'dbman_remote_metadata.json')) as f:
            metadata = json.load(f)
        os.remove(path_maker(get_local_databases_directory(args), 'dbman_remote_metadata.json'))
    else:
        metadata = []
    return metadata


def save_remote_metadata(args, metaliste):
Rudel Fankep's avatar
Rudel Fankep committed
230
    with open(path_maker(get_local_databases_directory(args), 'dbman_remote_metadata.json'), 'w') as f:
Rudel Fankep's avatar
Rudel Fankep committed
231
        json.dump(metaliste, f)
Rudel Fankep's avatar
Rudel Fankep committed
232
    subprocess.run(['./upload_json.sh', get_local_databases_directory(args), 'dbman_remote_metadata.json', get_remote_databases_directory(args)])
Rudel Fankep's avatar
Rudel Fankep committed
233
    os.remove(path_maker(get_local_databases_directory(args), 'dbman_remote_metadata.json'))
Rudel Fankep's avatar
Rudel Fankep committed
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266


def get_remote_filename(args, version):
    return path_maker(get_remote_databases_directory(args), get_tar_filename(args, version))


def get_remote_files():
    return subprocess.run(['s3cmd', 'la'], capture_output=True, text=True).stdout.split()


def get_tar_filename(args, version):
    return get_tool_directory_name(args, version) + '.tar.gz'


def create_tar_file_and_upload(args, version):
    subprocess.run(['./upload_db.sh', get_local_databases_directory(args), get_tar_filename(args, version), get_tool_directory_name(args, version), get_remote_databases_directory(args)])


def prepare(args):
    if args.database in data.keys():
        version = get_version(args)
        print('')
        raw_directory_path = path_maker(get_local_databases_directory(args), get_raw_directory_name(args, version))
        if not os.path.isdir(raw_directory_path):
            os.mkdir(raw_directory_path)
            subprocess.run([data[args.database]['prepare'], raw_directory_path])
            metadata = get_local_metadata(args)
            details = {'name': args.database, 'tool': 'raw', 'version': version, 'created': str(date.today())}
            metadata.append(details)
            save_local_metadata(args, metadata)
            print("The {} file is in: ".format(args.database) + os.path.abspath(raw_directory_path))
        else:
            print('The {} file already exists in: '.format(args.database) + os.path.abspath(raw_directory_path))
Rudel Fankep's avatar
Rudel Fankep committed
267
268
        if args.tool in data[args.database]['tool'].keys():

Rudel Fankep's avatar
Rudel Fankep committed
269
270
            tool_dir = get_path_tool_directory(args, version)
            tool_file = path_maker(raw_directory_path, data[args.database]['filename'])
Rudel Fankep's avatar
Rudel Fankep committed
271
272
273
274
275
276
277
            tool_dir_file = path_maker(tool_dir, data[args.database]['filename'])
            if not os.path.isdir(tool_dir):
                os.mkdir(tool_dir)
                os.symlink(os.path.relpath(os.path.abspath(tool_file), tool_dir), tool_dir_file)
                subprocess.run([data[args.database]['tool'][args.tool], tool_dir, data[args.database]['filename']])
                print('The {} files are in: '.format(args.tool) + os.path.abspath(tool_dir))
                os.unlink(tool_dir_file)
Rudel Fankep's avatar
Rudel Fankep committed
278
279
280
281
                metadata = get_local_metadata(args)
                details = {'name': args.database, 'tool': args.tool, 'version': version, 'created': str(date.today())}
                metadata.append(details)
                save_local_metadata(args, metadata)
Rudel Fankep's avatar
Rudel Fankep committed
282
283
284
285
286
            else:
                print('The {} files are already exists in: '.format(args.tool) + os.path.abspath(tool_dir))
        else:
            print('Tool error. There are following possibility: {}'.format([tool for tool in data[args.database]['tool'].keys()]))
    else:
Rudel Fankep's avatar
Rudel Fankep committed
287
        print('Database error. There are following possibility: {}'.format([database for database in data.keys()]))
Rudel Fankep's avatar
Rudel Fankep committed
288
289
290
291


def upload(args):
    if args.database in data.keys():
Rudel Fankep's avatar
Rudel Fankep committed
292
293
294
295
296
297
298
299
300
301
302
        if args.tool in data[args.database]['tool'].keys() or args.tool == "raw":
            version = get_local_json_version(args)
            # 'error' when the data to upload was not found local
            if version != 'error':
                if get_remote_filename(args, version) not in get_remote_files():
                    create_tar_file_and_upload(args, version)
                    remote_metadata = get_remote_metadata(args)
                    for dictionnary in get_local_metadata(args):
                        if dictionnary['name'] == args.database and dictionnary['tool'] == args.tool and dictionnary['version'] == version:
                            remote_metadata.append(dictionnary)
                    save_remote_metadata(args, remote_metadata)
Rudel Fankep's avatar
Rudel Fankep committed
303
                else:
Rudel Fankep's avatar
Rudel Fankep committed
304
                    print('The {} files are already in {}'.format(get_tool_directory_name(args, version), get_remote_databases_directory(args)))
Rudel Fankep's avatar
Rudel Fankep committed
305
            else:
Rudel Fankep's avatar
Rudel Fankep committed
306
                print('There is no {} data to upload in {}. Prepare the database first'.format(args.database+' '+args.tool, os.path.abspath(get_local_databases_directory(args))))
Rudel Fankep's avatar
Rudel Fankep committed
307
308
309
310
311
312
313
314
        else:
            print('Tool error. There are following possibility: {}'.format([tool for tool in data[args.database]['tool'].keys()]))
    else:
        print('Database error. There are following possibility: {}'.format([database for database in data.keys()]))


def download(args):
    if args.database in data.keys():
Rudel Fankep's avatar
Rudel Fankep committed
315
316
317
318
319
320
321
322
323
324
325
326
        if args.tool in data[args.database]['tool'].keys() or args.tool == "raw":
            version = get_remote_json_version(args)
            if version != 'error':
                if not os.path.isdir(get_path_tool_directory(args, version)):
                    download_file = path_maker(get_remote_databases_directory(args), get_tar_filename(args, version))
                    subprocess.run(['./download_db.sh', download_file, get_local_databases_directory(args), get_tar_filename(args, version)])
                    local_metadata = get_local_metadata(args)
                    for dictionnary in get_remote_metadata(args):
                        if dictionnary['name'] == args.database and dictionnary['tool'] == args.tool and dictionnary['version'] == version:
                            local_metadata.append(dictionnary)
                    save_local_metadata(args, local_metadata)

Rudel Fankep's avatar
Rudel Fankep committed
327
                else:
Rudel Fankep's avatar
Rudel Fankep committed
328
                    print('{} is already in the local directory {}'.format(get_tool_directory_name(args, version), os.path.abspath(get_local_databases_directory(args))))
Rudel Fankep's avatar
Rudel Fankep committed
329
            else:
Rudel Fankep's avatar
Rudel Fankep committed
330
                print('There is no {} files to download in {}:'.format(args.database+' '+args.tool, get_remote_databases_directory(args)))
Rudel Fankep's avatar
Rudel Fankep committed
331
332
333
334
335
336
337
338
        else:
            print('Tool error. There are following possibility: {}'.format([tool for tool in data[args.database]['tool'].keys()]))
    else:
        print('Database error. There are following possibility: {}'.format([database for database in data.keys()]))


def delete(args):
    if args.database in data.keys():
Rudel Fankep's avatar
Rudel Fankep committed
339
        if args.tool in data[args.database]['tool'].keys() or args.tool == "raw":
Rudel Fankep's avatar
Rudel Fankep committed
340
            if args.place == 'local':
Rudel Fankep's avatar
Rudel Fankep committed
341
342
343
344
345
346
347
348
349
350
351
352
                version = get_local_json_version(args)
                dbman_dir = get_local_databases_directory(args)
                directory_to_delete = path_maker(dbman_dir, get_tool_directory_name(args, version))
                if os.path.isdir(directory_to_delete):
                    shutil.rmtree(directory_to_delete)
                    metadata = get_local_metadata(args)
                    for position in range(0, len(metadata)):
                        if metadata[position]['name'] == args.database and metadata[position]['tool'] == args.tool and metadata[position]['version'] == version:
                            del metadata[position]
                            break
                    save_local_metadata(args, metadata)
                    print("The {} files were successfully delete from: ".format(get_tool_directory_name(args, version)) + os.path.abspath(dbman_dir))
Rudel Fankep's avatar
Rudel Fankep committed
353
                else:
Rudel Fankep's avatar
Rudel Fankep committed
354
355
356
357
358
359
360
361
362
363
364
365
366
367
                    print("The {} files aren't existing in: ".format(get_tool_directory_name(args, version)) + os.path.abspath(dbman_dir))
            elif args.place == 's3':
                version = get_remote_json_version(args)
                web_store = get_remote_databases_directory(args)
                web_file = path_maker(web_store, get_tar_filename(args, version))
                if web_file in get_remote_files():
                    subprocess.run(['./delete_remote_file.sh', web_file])
                    metadata = get_remote_metadata(args)
                    for position in range(0, len(metadata)):
                        if metadata[position]['name'] == args.database and metadata[position]['tool'] == args.tool and metadata[position]['version'] == version:
                            del metadata[position]
                            break
                    save_remote_metadata(args, metadata)
                    print("The {} files were successfully delete from: ".format(get_tool_directory_name(args, version)) + web_store)
Rudel Fankep's avatar
Rudel Fankep committed
368
                else:
Rudel Fankep's avatar
Rudel Fankep committed
369
                    print("The {} files aren't existing in: ".format(get_tool_directory_name(args, version)) + web_store)
Rudel Fankep's avatar
Rudel Fankep committed
370
371
372
373
374
375
376
377
        else:
            print('Tool error. There are following possibility: {}'.format([tool for tool in data[args.database]['tool'].keys()]))
    else:
        print('Database error. There are following possibility: {}'.format([database for database in data.keys()]))


def list_recipes(args):
    for database in data.keys():
Rudel Fankep's avatar
Rudel Fankep committed
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
        print('{}:{}'.format(database, [tool for tool in data[database]['tool'].keys()]))


def list_local_databases(args):
    metadata = get_local_metadata(args)
    for dic_fichier in metadata:
        print('\n{}[{}] Version: {} erstellt am: {}'.format(dic_fichier['name'], dic_fichier['tool'], dic_fichier['version'], dic_fichier['created']))


def list_remote_databases(args):
    metadata = get_remote_metadata(args)
    for dic_fichier in metadata:
        print('\n{}[{}] Version: {} erstellt am: {}'.format(dic_fichier['name'], dic_fichier['tool'], dic_fichier['version'], dic_fichier['created']))


data = {'swissprot': {'prepare': './prepare_swissprot.sh',
                    'tool': {'blast': './blast_db.sh', 'ghostx': './ghostx_db.sh'},
                    'filename': 'uniprot_sprot.fasta',
                    'version': get_swissprot_version
                    },
            'pfam': {'prepare': './prepare_pfam.sh',
                    'tool': {'hmmer': './hmmer_pfam.sh'},
                    'filename': 'Pfam-A.hmm',
                    'version': get_pfam_version
                    },
            'card': {'prepare': './prepare_card.sh',
                    'tool': {'blast': './blast_db.sh', 'ghostx': './ghostx_db.sh'},
                    'filename': 'protein_fasta_protein_homolog_model.fasta',
                    'version': get_card_version
                    }
    }

Rudel Fankep's avatar
Rudel Fankep committed
410
411

def main():
Rudel Fankep's avatar
Rudel Fankep committed
412
413
    if not os.path.isdir('local_databases'):
        os.mkdir('local_databases')
Rudel Fankep's avatar
Rudel Fankep committed
414

Rudel Fankep's avatar
Rudel Fankep committed
415
    args = myparser()
Rudel Fankep's avatar
Rudel Fankep committed
416
417
418
    args.func(args)


Rudel Fankep's avatar
Rudel Fankep committed
419
420
if __name__ == '__main__':
    main()