main.py 10.8 KB
Newer Older
1
import argparse
2
import sys
3
4
import os
import json
5
6
7
8
9
import logging
import tempfile
import pathlib
import prettytable
import shutil
10
import pkg_resources
11
import shutil
12
import dbman.helper as h
13

14
def main():
15
  logging.basicConfig(level=logging.DEBUG)
16
17
18
19
20
21
22
23
24
  parser = argparse.ArgumentParser(description='Prepare, compile and distribute biological databases', prog='dbman')
  subparsers = parser.add_subparsers(title='Subcommands', description='Available subcommands')

  prepare_parser = subparsers.add_parser('prepare', help='Download a raw database and compile it. See \'list_recipes\' for overview of available databases and tools')
  prepare_parser.add_argument('database', help='The database identifier', type=str)
  prepare_parser.add_argument('tool', help='The target tool', type=str,)
  prepare_parser.add_argument('-d', '--directory', dest='dbdir', help='Override the databases root directory', type=str)
  prepare_parser.add_argument('-v', '--version', help='Override the version of the database', type=str)
  prepare_parser.add_argument('--force_download', help='Force download of the database. Will replace existing downloads', action='store_true')
25
  prepare_parser.add_argument('--force_rebuild', help='Force rebuild of the database. Will replace existing builds', action='store_true')
26
27
  prepare_parser.add_argument('--keep_temp', help='Keep temporary data on failure', action='store_true')

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
  prepare_parser.set_defaults(func=prepare)

  list_local_databases_parser = subparsers.add_parser('list_local_databases', help='List the locally available databases')
  list_local_databases_parser.add_argument('-d', '--directory', dest='dbdir', help='Override the databases root directory', type=str)
  list_local_databases_parser.set_defaults(func=list_local_databases)

  list_recipes_parser = subparsers.add_parser('list_recipes', help='print databases with the possible Tool/s')
  list_recipes_parser.set_defaults(func=list_recipes)

  delete_parser = subparsers.add_parser('remove', help='Remove a local database.')
  delete_parser.add_argument('database', help='The database identifier')
  delete_parser.add_argument('tool', nargs='?', type=str, help='The target tool', default=None)
  delete_parser.add_argument('version', nargs='?', type=str, help='The version', default=None)
  delete_parser.add_argument('-f', '--force', action='store_true', help='Forces the removal. If ommited nothing will be removed.')
  delete_parser.add_argument('-d', '--directory', dest='dbdir', help='Override the databases root directory', type=str)
  delete_parser.set_defaults(func=delete)

  if len(sys.argv) == 1:
    parser.print_help()
  else:
48
49
    args = parser.parse_args()
    args.parser = parser
50
    args.func(args)
51
52

def delete(args):
53
54
55
56
57
58
59
60
61
62
63
64
65
66
  dbs = _getdbs(_local_databases(args), args.database, args.tool, args.version)
  if not dbs:
    print("No databases found according to the given criteria.")
    return
  if not args.force:
    print()
    print("      ************************************************************************************")
    print("      * Dry mode, nothing will be deleted. Use --force to actually delete the databases. *")
    print("      ************************************************************************************")
    print()
  for db in dbs:
    print("Deleting database '" + db['location'] + "'")
    if args.force:
      shutil.rmtree(db['location'])
67
68

def list_recipes(args):
69
70
71
72
73
74
  recipes = _recipes(args)
  table = prettytable.PrettyTable()
  table.field_names = ['name', 'tools']
  for recipe in recipes:
    table.add_row([recipe, ", ".join(recipes[recipe].keys())])
  print(table)
75
76

def list_local_databases(args):
77
78
  dbs = _local_databases(args)
  table = prettytable.PrettyTable()
79

80
81
82
83
  table.field_names = ['name', 'description', 'version', 'tool', 'location','created']
  for db in dbs:
    table.add_row([db['name'], db['description'], db['version'], db['tool'], db['location'], db['creation_date']])
  print(table)
84

85
def prepare(args):
86
87
88
89
90
91
92
93
94
  logging.debug("Local databases:\n   %s",  _localdatabases_string(args))
  download_recipe = _get_recipe(_recipes(args), args.database, "download")
  if not download_recipe:
    logging.warn("Recipe '%s - %s' does not exits", args.tool, "download")
    return
  transform_recipe = _get_recipe(_recipes(args), args.database, args.tool)
  if not transform_recipe:
    logging.warn("Recipe '%s - %s' does not exits", args.tool, args.tool)
    return
95
  db_info = _getdb(_local_databases(args), args.database, 'download')
96
  if not db_info or args.force_download:
97
    logging.info("Database '%s' not found. Downloading it.", args.database)
98
    db_info = run_in_tempdir(func = lambda: h.run_external_tool(download_recipe['script'], download_recipe['params'] if 'params' in download_recipe else None), 
99
                   success=_rename_directory_after_metadata, 
100
                   fail=_delete_directory if not args.keep_temp else lambda x: print())
101
102
  else:
    logging.info("Database '%s - %s' found. Download not required.", args.database, db_info['version'])
103

104
105
  if args.tool == 'download':
    return
106

107
108
109
110
111
112
113
114
  if not _exists(_local_databases(args), args.database, args.tool, db_info['version']) or args.force_rebuild:
    logging.info("Transforming '%s' for '%s'", args.database, args.tool)
    run_in_tempdir(func = lambda: run_transform(db_info, transform_recipe), 
                   success=_rename_directory_after_metadata, 
                   fail=_delete_directory if not args.keep_temp else lambda x: print())
  else:
    logging.warn("Database exists found for '%s - %s - %s'. Skipping transformation.", args.database, args.tool, db_info['version'])
    return
115

116
117
118
def run_transform(source_metadata, recipe):
  h.save_metadata(h.to_absolute_paths(source_metadata), "./.source_metadata.json")
  return h.run_external_tool(recipe['script'], recipe['params'] if 'params' in recipe else None)
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138

def _get_recipe(recipes, database, tool):
  for recipe in recipes:
    if recipe == database and tool in recipes[recipe]:
      return recipes[recipe][tool]
  return None

def _delete_directory(path):
  logging.debug("Removing directory %s", path)
  shutil.rmtree(path)

def _rename_directory_after_metadata(path):
  metadata_path = os.path.join(path, "metadata.json")
  if os.path.isfile(metadata_path):
    metadata = None
    with open(metadata_path) as f:
      metadata = json.load(f)
    newname = metadata['name'] + "_" + metadata['version'] + '_' + metadata['tool']
    oldpath = pathlib.Path(path)
    newpath = oldpath.parent.parent.joinpath(newname)
139
140
    if newpath.exists():
      shutil.rmtree(str(newpath))
141
142
    logging.debug("Renaming '%s' to '%s'", oldpath, newpath)
    oldpath.rename(newpath)
143
    metadata['location'] = str(newpath.absolute())
Lukas Jelonek's avatar
Lukas Jelonek committed
144
145
    return metadata
  return None
146
147
148
149
150
151
152
153
154
155
156
157
158

def run_in_tempdir(func=None, success=None, fail=None):
  dbtmpdir = os.path.join(_databases_dir(), "tmp")
  if not os.path.exists(dbtmpdir):
    os.mkdir(dbtmpdir)
  tmpdir = tempfile.mkdtemp(dir=dbtmpdir)
  olddir = os.getcwd()
  os.chdir(tmpdir)
  logging.debug("Starting external process in '%s'", os.getcwd())
  ret = func()
  os.chdir(olddir)
  if ret == 0:
    logging.debug("External process succeeded")
159
    return success(tmpdir)
160
161
162
  else:
    logging.debug("External process failed")
    fail(tmpdir)
163
    return None
164
165
166

def _recipes(args):
   recipes = {
Lukas Jelonek's avatar
Lukas Jelonek committed
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
     'resfinder': {
       'download': {'script': _pkgres('recipes/download_resfinder.py')},
       'blast': {'script': _pkgres('recipes/create_blast_dbs.py')},
       'diamond': {'script': _pkgres('recipes/create_diamond_dbs.py')},
       'ghostx': {'script': _pkgres('recipes/create_ghostx_dbs.py')},
     },
     'pfam': {
       'download': {'script': _pkgres('recipes/download_pfam.py')},
       'hmmer': {'script': _pkgres('recipes/create_hmmer_dbs.py')},
     },
     'card': {
       'download': {'script': _pkgres('recipes/download_card.py')},
       'blast': {'script': _pkgres('recipes/create_blast_dbs.py')},
       'diamond': {'script': _pkgres('recipes/create_diamond_dbs.py')},
       'ghostx': {'script': _pkgres('recipes/create_ghostx_dbs.py')},
     },
     'swissprot': {
       'download': {'script': _pkgres('recipes/download_uniprotkb.py'), 'params': ['--database', 'swissprot', '--type', 'fasta']},
       'blast': {'script': _pkgres('recipes/create_blast_dbs.py')},
       'diamond': {'script': _pkgres('recipes/create_diamond_dbs.py')},
       'ghostx': {'script': _pkgres('recipes/create_ghostx_dbs.py')},
     },
     'trembl': {
       'download': {'script': _pkgres('recipes/download_uniprotkb.py'), 'params': ['--database', 'trembl', '--type', 'fasta']},
       'blast': {'script': _pkgres('recipes/create_blast_dbs.py')},
       'diamond': {'script': _pkgres('recipes/create_diamond_dbs.py')},
       'ghostx': {'script': _pkgres('recipes/create_ghostx_dbs.py')},
     }
195
196
197
   }
   return recipes

198
199
200
def _pkgres(name):
  return os.path.abspath(pkg_resources.resource_filename(__name__, name))

201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
def _getdb(databases, database, tool=None, version=None):
  dbs = _getdbs(databases, database, tool, version)
  if len(dbs) > 0:
    return dbs[0]
  else:
    return None

def _getdbs(databases, database, tool=None, version=None):
  dbs = []
  for d in databases:
    if d['name'] == database:
      if not tool:
        dbs.append(d)
      else:
        if d['tool'] == tool:
          if not version:
            dbs.append(d)
          else:
            if d['version'] == version:
              dbs.append(d)
  return dbs


def _exists(databases, database, tool=None, version=None):
  '''checks if the database, tool, version combination exists in the databases list
     if any of the fields is 'None', it is ignored 
  '''
  for d in databases:
    if d['name'] == database:
      if not tool:
        return True
      else:
        if d['tool'] == tool:
          if not version:
            return True
          else:
            if d['version'] == version:
              return True
  return False

def _local_databases(args):
  dbdir = _databases_dir(args)
  local_databases = []
  if os.path.isdir(dbdir):
    # find and read all metadata.json files in direct subdirectories of dbdir
    subdirectories = [f.path for f in os.scandir(dbdir) if f.is_dir() and os.path.isfile(os.path.join(f.path, "metadata.json"))]
    for dir in subdirectories:
      metadata_file = dir + "/metadata.json"
      if os.path.isfile(metadata_file):
        with open(metadata_file) as f:
          m = json.load(f)
          m['location'] = os.path.abspath(dir)
          local_databases.append(m)
  return local_databases

256
257
258
259
def _localdatabases_string(args=None):
  dbs_string = "\n   ".join(map(lambda x: ", ".join([x['name'], x['tool'], x['version']]), _local_databases(args)))
  return dbs_string

260
261
262
263
264
265
266
def _databases_dir(args=None):
  '''get the configured local databases directory'''
  if args and 'dbdir' in args and args.dbdir:
    return args.dbdir
  elif  "DBMAN_DBDIR" in os.environ:
    return os.environ["DBMAN_DBDIR"]
  else:
267
    return "dbs"
268
269

if __name__ == '__main__':
270
  main()