Commit fe053765 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Add pfam download

parent 5638d6f2
...@@ -164,6 +164,7 @@ def run_in_tempdir(func=None, success=None, fail=None): ...@@ -164,6 +164,7 @@ def run_in_tempdir(func=None, success=None, fail=None):
def _recipes(args): def _recipes(args):
recipes = { recipes = {
'pfam': {'download': {'script': _pkgres('recipes/download_pfam.py')}},
'card': {'download': {'script': _pkgres('recipes/download_card.py')}, 'card': {'download': {'script': _pkgres('recipes/download_card.py')},
'blast': {'script': _pkgres('recipes/create_blast_dbs.py')}, 'blast': {'script': _pkgres('recipes/create_blast_dbs.py')},
'diamond': {'script': _pkgres('recipes/create_diamond_dbs.py')}, 'diamond': {'script': _pkgres('recipes/create_diamond_dbs.py')},
......
#!/usr/bin/env python3
import dbman.helper as h
import json
import os
import gzip
import logging
logging.basicConfig(level=logging.INFO)
dbname = "pfam"
dbdescription = "The Pfam database is a large collection of protein families"
url = "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release"
def read_version_info(version_fn):
version_data = {}
with gzip.open(version_fn, 'rt') as f:
for line in f:
(key, value) = line.split(':')
key = key.strip()
value = value.strip()
if key == "Pfam release":
key = "version"
if key == "Pfam-A families":
key = "families"
if key == "Date":
key = "release_date"
if key == "Based on UniProtKB":
key = "based_on_uniprotkb_version"
version_data[key] = value
return version_data
version_fn = h.download(url + '/Pfam.version.gz')
version_info = read_version_info(version_fn)
# download archive
hmm_fn = h.download(url + "/Pfam-A.hmm.gz")
flatfile_fn = h.download(url + "/Pfam-A.hmm.dat.gz")
# lookup tags for fasta files
parts = []
parts.append({'files':[hmm_fn], 'tags': ['profile', 'hmm', 'gzip', 'compressed']})
parts.append({'files':[flatfile_fn], 'tags': ['flatfile', 'annotation', 'gzip', 'compressed']})
parts.append({'files':[version_fn], 'tags': ['version_info', 'gzip', 'compressed']})
version_info['parts'] = parts
# write metadata file
h.create_metadata(dbname, "download", dbdescription, other=version_info)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment