Commit e6f821cc authored by Rudel Fankep's avatar Rudel Fankep
Browse files

argument fasta

parent dd9ddf79
Verzeichnisstruktur beim prepare
dbname+_+tool. keine unterverzeichnisse: skripte wäre komplizierter. man müsste in jedem verzechnis gehen bei
operationen und wieder rauskommen
......@@ -6,5 +6,4 @@ TOOL_FILE=$2
cd $TOOL_DIR
makeblastdb -dbtype prot -in $TOOL_FILE
rm $TOOL_FILE
cd -
\ No newline at end of file
#!/bin/bash
database=$1
file=$2
cd $file
rm $database*
cd -
\ No newline at end of file
WEB_FILE=$1
s3cmd del $WEB_FILE
\ No newline at end of file
#!/bin/bash
Target_file=$1
Target_dir=$2
s3cmd get $Target_file $Target_dir
TARGET_FILE=$1
TARGET_DIR=$2
FILE_NAME=$3
cd $TARGET_DIR
s3cmd get $TARGET_FILE
tar -xzvf $FILE_NAME
rm $FILE_NAME
cd -
\ No newline at end of file
......@@ -4,5 +4,4 @@ TOOL_DIR=$1
TOOL_FILE=$2
cd $TOOL_DIR
ghostx db -i $TOOL_FILE -o ghostx_db
rm $TOOL_FILE
cd -
\ No newline at end of file
#!/bin/bash
TOOL_DIR=$1
TOOL_FILE=$2
cd $TOOL_DIR
hmmpress $TOOL_FILE
cd -
\ No newline at end of file
#!/home/theia/conda/bin/python
import argparse
import subprocess
import os
......@@ -10,7 +11,7 @@ data = {'swissprot': {'prepare': './prepare_swissprot.sh',
},
'pfam': {'prepare': './prepare_pfam.sh',
'tool': {'hmmer': './hmmer_pfam.sh'},
'filename':''
'filename':'Pfam-A.hmm'
},
'card': {'prepare':'./prepare_card.sh',
'tool':{'blast':'./blast_db.sh','ghostx':'./ghostx_db.sh'},
......@@ -26,26 +27,37 @@ def myparser():
prepare_parser = subparsers.add_parser('prepare', help='Databank download from the Website, extraction and transformation for another Programm')
prepare_parser.add_argument('database', help='Database which have to be prepared', type=str)
prepare_parser.add_argument('tool',help='Programm/Tool for the post analysis',type=str,)
prepare_parser.add_argument('-d', '--directory', dest='dir', help='target directory for the output.Default will be a created("output") folder in the working directory', type=str)
prepare_parser.add_argument('tool', help='Programm/Tool for the post analysis',type=str,)
prepare_parser.add_argument('-d', '--directory', dest='dir', help='set the local directory to save/to get the data.', type=str)
prepare_parser.set_defaults(func=prepare)
upload_parser = subparsers.add_parser('upload', help='Databank copy from the local directory to the web storage')
upload_parser.add_argument('database', help='database to be transfered from the local directory', type=str)
upload_parser.add_argument('tool',type=str,help='database type which have to be transfered')
upload_parser.add_argument('-d', '--directory', dest='dir', help='Target directory for the file transfer into "S3". Default will be "s3://db_storage"', type=str)
upload_parser.add_argument('tool', type=str, help='database type which have to be transfered')
upload_parser.add_argument('-s', '--s3store', dest='store', help='set the "S3" storage of the data ')
upload_parser.add_argument('-d', '--directory', dest='dir', help='set the local directory to save/to get the data.', type=str)
upload_parser.add_argument('-r', '--raw', help='Raw data need to be upload', action='store_true')
upload_parser.set_defaults(func=upload)
download_parser = subparsers.add_parser('download', help='Datenbank copy from the web storage to the working computer')
download_parser.add_argument('file', help='File to be transfered from "S3"', type=str)
download_parser.add_argument('-d', '--directory', dest='dir', help='Target directory for the file transfer. Default will be a created folder("output) in the current working directory',type=str)
download_parser.add_argument('database', help='database to be transfered from "S3"', type=str)
download_parser.add_argument('tool', type=str, help='database type which have to be transfered')
download_parser.add_argument('-s', '--s3store', dest='store', help='set the "S3" storage of the data ')
download_parser.add_argument('-d', '--directory', dest='dir', help='set the local directory to save/to get the data.',type=str)
download_parser.add_argument('-r', '--raw', help='Raw data need to be download', action='store_true')
download_parser.set_defaults(func=download)
delete_parser = subparsers.add_parser('delete', help='delete existing files from local directory or from "S3"')
delete_parser.add_argument('database', help = 'database which have to be delete')
delete_parser.add_argument('place', choices = ['local','S3'], help = 'defined the place where the database have to be delete')
delete_parser.add_argument('-d', '--directory', dest='dir', help='directory where the database have to be delete')
delete_parser.add_argument('database', help='database which have to be delete')
delete_parser.add_argument('tool', type=str, help='database type which have to be deleted')
delete_parser.add_argument('place', choices=['local', 'S3'], help='defined the place where the database have to be delete')
delete_parser.add_argument('-d', '--directory', dest='dir', help='set the local directory to save/to get the data.',type=str)
delete_parser.add_argument('-s', '--s3store', dest='store', help='set the "S3" storage of the data ')
delete_parser.add_argument('-r', '--raw', help='Raw data need to be deleted', action='store_true')
delete_parser.set_defaults(func=delete)
list_recipes_parser = subparsers.add_parser('list_recipes', help='print databases with the possible Tool')
list_recipes_parser.set_defaults(func=list_recipes)
return parser.parse_args()
......@@ -58,14 +70,18 @@ def targetdir_maker(args):
return databasedir
def get_tool_dir(dbdir, db, tool):
tool_dir = path_maker(dbdir, db+'_'+tool)
return tool_dir
def s3dir_maker(args):
web_dir='s3://db_storage'
if args.dir:
web_dir=args.dir
web_dir = 's3://db_storage'
if args.store:
web_dir = args.store
elif 'S3_DBDIR' in os.environ:
web_dir=os.environ['S3_DBDIR']
web_dir = os.environ['S3_DBDIR']
return web_dir
# paste the target directory with the filename.(will be important to test if the file is already there. "/" or not)
def path_maker(directory, file):
......@@ -81,66 +97,150 @@ def prepare(args):
if args.database in data.keys():
dbman_dir = targetdir_maker(args)
os.environ['PREPARE_DATA'] = dbman_dir
raw_dir = path_maker(dbman_dir, args.database+'_raw')
if not os.path.isdir(raw_dir):
os.mkdir(raw_dir)
subprocess.run([data[args.database]['prepare'], raw_dir])
print("The {} file is in: ".format(args.database) + raw_dir)
print("The {} file is in: ".format(args.database) + os.path.abspath(raw_dir))
else:
print('The {} file already exists in: '.format(args.database) + raw_dir)
print('The {} file already exists in: '.format(args.database) + os.path.abspath(raw_dir))
if args.tool in data[args.database]['tool'].keys():
tool_dir=path_maker(dbman_dir,args.database+'_'+args.tool)
tool_file=path_maker(raw_dir,data[args.database]['filename'])
tool_dir = get_tool_dir(dbman_dir, args.database, args.tool)
tool_file = path_maker(raw_dir, data[args.database]['filename'])
tool_dir_file = path_maker(tool_dir, data[args.database]['filename'])
if not os.path.isdir(tool_dir):
os.mkdir(tool_dir)
shutil.copy(tool_file,tool_dir)
os.symlink(os.path.relpath(os.path.abspath(tool_file), tool_dir), tool_dir_file)
subprocess.run([data[args.database]['tool'][args.tool], tool_dir, data[args.database]['filename']])
print('The {} files are in: '.format(args.tool) + tool_dir)
print('The {} files are in: '.format(args.tool) + os.path.abspath(tool_dir))
os.unlink(tool_dir_file)
else:
print('The {} files are already exists in: '.format(args.tool) + tool_dir)
print('The {} files are already exists in: '.format(args.tool) + os.path.abspath(tool_dir))
else:
print('Tool error. Verify your input(tool) or/and check the possibility in the help')
print('Tool error. There are following possibility: {}'.format([tool for tool in data[args.database]['tool'].keys()]))
else:
print('Database error. Verify your input(database) or/and check the possibility in the help')
return os.environ['PREPARE_DATA']
print('Database error. There are following possibility: {}'.format([database for database in data.keys()]))
def upload(args):
if args.database in data.keys() and args.tool in data[args.database]['tool'].keys():
dbman_dir = s3dir_maker(args)
dbdata = os.environ['PREPARE_DATA']
if args.database in data.keys():
if args.tool in data[args.database]['tool'].keys():
upload_dir = s3dir_maker(args)
dbman_dir = targetdir_maker(args)
s3_list = subprocess.run(['s3cmd', 'la'], capture_output=True, text=True).stdout.split()
s3_dir = get_tool_dir(upload_dir, args.database, args.tool+'.tar.gz')
if s3_dir not in s3_list:
tool_dir = get_tool_dir(dbman_dir, args.database, args.tool)
upload_data = args.database+'_'+args.tool
upload_data_path = path_maker(dbdata, upload_data)
upload_data_name = '{}'.format(upload_data+'.tar.gz')
if os.path.isdir(upload_data_path):
subprocess.run(['./upload_db.sh', dbdata, upload_data, dbman_dir, upload_data_name])
upload_data_name = '{}'.format(args.database+'_'+args.tool+'.tar.gz')
if os.path.isdir(tool_dir):
subprocess.run(['./upload_db.sh', dbman_dir, upload_data_name, upload_data, upload_dir])
else:
print('There is no {} data to upload in {}. Prepare the database first'.format(args.database+'_'+args.tool,os.path.abspath(dbman_dir)))
else:
print('There is no data for upload in {}. Prepare the database first'.format(dbdata))
print('The {} files are already in {}'.format(args.database+'_'+args.tool,upload_dir))
if args.raw:
s3_raw_dir=get_tool_dir(upload_dir,args.database,'raw.tar.gz')
if s3_raw_dir not in s3_list:
local_raw_dir = get_tool_dir(dbman_dir,args.database,'raw')
raw_data = args.database+'_raw'
tarraw_data_name = args.database+'_raw.tar.gz'
if os.path.isdir(local_raw_dir):
subprocess.run(['./upload_db.sh', dbman_dir, tarraw_data_name, raw_data, upload_dir])
else:
print('Verify your database and/or Tool input ')
print('There is no {} data to upload in {}. Prepare the database first'.format(args.database+'_raw',os.path.abspath(dbman_dir)))
else:
print('The {} files are already in {}'.format(args.database+'_raw',upload_dir))
else:
print('Tool error. There are following possibility: {}'.format([tool for tool in data[args.database]['tool'].keys()]))
else:
print('Database error. There are following possibility: {}'.format([database for database in data.keys()]))
def download(args):
if args.database in data.keys():
if args.tool in data[args.database]['tool'].keys():
dbman_dir = targetdir_maker(args)
subprocess.run(['./download_db.sh', args.file,dbman_dir])
tool_dir = get_tool_dir(dbman_dir, args.database, args.tool)
if not os.path.isdir(tool_dir):
s3_list = subprocess.run(['s3cmd','la'],capture_output=True,text=True).stdout.split()
download_data_name = args.database+'_'+args.tool+'.tar.gz'
download_dir = s3dir_maker(args)
download_file = path_maker(download_dir, download_data_name)
if download_file in s3_list:
subprocess.run(['./download_db.sh', download_file, dbman_dir, download_data_name])
else:
print('There is no {} files in {}:'.format(args.database+'_'+args.tool, download_dir))
else:
print('{} is already in the local directory {}'.format(args.database+'_'+args.tool,os.path.abspath(dbman_dir)))
if args.raw:
local_raw_dir = get_tool_dir(dbman_dir,args.database,'raw')
if not os.path.isdir(local_raw_dir):
download_dir = s3dir_maker(args)
s3_raw_dir = get_tool_dir(download_dir,args.database,'raw.tar.gz')
s3_list = subprocess.run(['s3cmd','la'],capture_output=True,text=True).stdout.split()
tarraw_data_name = args.database+'_raw.tar.gz'
if s3_raw_dir in s3_list:
subprocess.run(['./download_db.sh', s3_raw_dir, dbman_dir, tarraw_data_name])
else:
print('There is no {} files in {}:'.format(args.database+'_raw', download_dir))
else:
print('{} is already in the local directory {}'.format(args.database+'_raw',os.path.abspath(dbman_dir)))
else:
print('Tool error. There are following possibility: {}'.format([tool for tool in data[args.database]['tool'].keys()]))
else:
print('Database error. There are following possibility: {}'.format([database for database in data.keys()]))
def delete(args):
if args.database in data.keys():
if args.tool in data[args.database]['tool'].keys():
if args.place == 'local':
dbman_dir = targetdir_maker(args)
testfile = filedir_maker(dbman_dir, data[args.database]['filename'])
if os.path.isfile(testfile):
subprocess.run(['./delete_db.sh',data[args.database]['filename'],dbman_dir])
print("The {} files were successfully delete in: ".format(args.database) + dbman_dir)
del_dir = path_maker(dbman_dir, args.database+'_'+args.tool)
if os.path.isdir(del_dir):
shutil.rmtree(del_dir)
print("The {} files were successfully delete from: ".format(args.database+'_'+args.tool) + os.path.abspath(dbman_dir))
else:
print("The {} files aren't existing in: ".format(args.database) + dbman_dir)
print("The {} files aren't existing in: ".format(args.database+'_'+args.tool) + os.path.abspath(dbman_dir))
if args.raw:
del_rawdir = path_maker(dbman_dir, args.database+'_raw')
if os.path.isdir(del_rawdir):
shutil.rmtree(del_rawdir)
print("The {} files were successfully delete from: ".format(args.database+'_raw') + os.path.abspath(dbman_dir))
else:
print("The {} files aren't existing in: ".format(args.database+'_raw') + os.path.abspath(dbman_dir))
elif args.place == 'S3':
web_store = s3dir_maker(args)
s3_list = subprocess.run(['s3cmd','la'],capture_output=True,text=True).stdout.split()
web_file = path_maker(web_store, args.database+'_'+args.tool+'.tar.gz')
if web_file in s3_list:
subprocess.run(['./delete_db.sh', web_file])
print("The {} files were successfully delete from: ".format(args.database+'_'+args.tool) + web_store)
else:
print("The {} files aren't existing in: ".format(args.database+'_'+args.tool) + web_store)
if args.raw:
web_rawfile = path_maker(web_store, args.database+'_raw.tar.gz')
if web_rawfile in s3_list:
subprocess.run(['./delete_db.sh', web_rawfile])
print("The {} files were successfully delete from: ".format(args.database+'_raw') + web_store)
else:
print("The {} files aren't existing in: ".format(args.database+'_raw') + web_store)
else:
print('Skript für S3 noch nicht fertig')
print('Tool error. There are following possibility: {}'.format([tool for tool in data[args.database]['tool'].keys()]))
else:
print('Database error. There are following possibility: {}'.format([database for database in data.keys()]))
def list_recipes(args):
for database in data.keys():
print('{}:{}'.format(database,[tool for tool in data[database]['tool'].keys()]))
def main():
if not os.path.isdir('output'):
......
......@@ -4,4 +4,5 @@ DOWNLOAD_DIR=$1
cd $DOWNLOAD_DIR
wget https://card.mcmaster.ca/download/0/broadstreet-v3.0.3.tar.gz
tar xjf broadstreet-v3.0.3.tar.gz
rm broadstreet-v3.0.3.tar.gz
cd -
\ No newline at end of file
......@@ -93,3 +93,4 @@ Database repository---> https://github.com/MGX-metagenomics/databases/blob/maste
anzeigen lassen---> echo $VARIABLE
löschen---> unset VARIABLE
setzen---> export VARIABLE = pfad
#!/bin/bash
DATA_DB=$1
TARGET_FILE=$2
DATA_DIR=$1
TARGET_DIR_NAME=$2
TARGET_DIR=$3
TARGET_FILE_NAME=$4
cd $DATA_DB
tar -czvf $TARGET_FILE_NAME $TARGET_FILE
s3cmd put $TARGET_FILE_NAME $TARGET_DIR
rm $TARGET_FILE_NAME
UPLOAD_DIR=$4
cd $DATA_DIR
tar -czvf $TARGET_DIR_NAME $TARGET_DIR
s3cmd put $TARGET_DIR_NAME $UPLOAD_DIR
rm $TARGET_DIR_NAME
cd -
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment