Commit 32e2b0b9 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Add hmmer transformation step

parent fe053765
...@@ -15,14 +15,20 @@ def download(url): ...@@ -15,14 +15,20 @@ def download(url):
print("") # wget does not add a newline to its download bar print("") # wget does not add a newline to its download bar
return filename return filename
def extract(filename): def extract(filename, target_dir=None):
logging.info("Extracting " + filename) logging.info("Extracting " + filename)
if '.tar.' in filename: if '.tar.' in filename:
with tarfile.open(filename, 'r') as tar: with tarfile.open(filename, 'r') as tar:
tar.extractall() tar.extractall()
return '.'
elif filename.endswith(".gz"): elif filename.endswith(".gz"):
with gzip.open(filename, 'rb') as gin, open(re.sub("\.gz$", "", filename), 'wb') as bout: target_file = re.sub("\.gz$", "", filename)
if target_dir:
target_file = os.path.join(target_dir, os.path.basename(target_file))
with gzip.open(filename, 'rb') as gin, open(target_file, 'wb') as bout:
shutil.copyfileobj(gin, bout) shutil.copyfileobj(gin, bout)
return target_file
else: else:
raise Exception("Compression not supported") raise Exception("Compression not supported")
......
...@@ -164,7 +164,9 @@ def run_in_tempdir(func=None, success=None, fail=None): ...@@ -164,7 +164,9 @@ def run_in_tempdir(func=None, success=None, fail=None):
def _recipes(args): def _recipes(args):
recipes = { recipes = {
'pfam': {'download': {'script': _pkgres('recipes/download_pfam.py')}}, 'pfam': {'download': {'script': _pkgres('recipes/download_pfam.py')},
'hmmer': {'script': _pkgres('recipes/create_hmmer_dbs.py')},
},
'card': {'download': {'script': _pkgres('recipes/download_card.py')}, 'card': {'download': {'script': _pkgres('recipes/download_card.py')},
'blast': {'script': _pkgres('recipes/create_blast_dbs.py')}, 'blast': {'script': _pkgres('recipes/create_blast_dbs.py')},
'diamond': {'script': _pkgres('recipes/create_diamond_dbs.py')}, 'diamond': {'script': _pkgres('recipes/create_diamond_dbs.py')},
......
#!/usr/bin/env python
import dbman.helper as h
import subprocess
import sys
import os
import json
from pathlib import Path
metadata = h.load_metadata('./.source_metadata.json')
hmm_profile_parts = [p for p in metadata['parts'] if 'hmm' in p['tags'] and 'profile' in p['tags']]
new_parts = []
for part in hmm_profile_parts:
# temporalily uncompress for hmmpress, as it can't handle gzip files
files = []
if 'gzip' in part['tags']:
for file in part['files']:
fn = h.extract(file, './')
files.append(fn)
else:
files = part['files']
for file in files:
name = Path(file).stem
command = 'hmmpress {}'.format(file)
cp = subprocess.run(command, shell=True)
if (cp.returncode != 0):
sys.exit(cp)
# remove unzipped files
if 'gzip' in part['tags']:
for file in files:
os.unlink(file)
files = [f.name for f in os.scandir('.') if f.name.startswith(name + '.') and not f.name.endswith('.')]
tags = ['hmm_profile_db']
tags.extend(part['tags'])
tags.remove('gzip')
tags.remove('compressed')
new_parts.append({'files': files, 'tags': tags})
h.create_metadata(metadata['name'], 'hmmer', metadata['description'], metadata['version'], other={'parts': new_parts})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment