Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SOaAS
dbman
Commits
2b71cd2d
Commit
2b71cd2d
authored
Jun 04, 2020
by
Lukas Jelonek
Browse files
Add uniprotkb download\nadd parameters to external processes\nChange default directory name to dbs
parent
9eb4fd32
Changes
3
Hide whitespace changes
Inline
Side-by-side
dbman/helper.py
View file @
2b71cd2d
import
wget
import
tarfile
import
gzip
import
shutil
import
re
import
json
import
datetime
import
logging
...
...
@@ -12,14 +15,21 @@ def download(url):
def
extract
(
filename
):
logging
.
info
(
"Extracting "
+
filename
)
with
tarfile
.
open
(
filename
,
'r'
)
as
tar
:
tar
.
extractall
()
if
'.tar.'
in
filename
:
with
tarfile
.
open
(
filename
,
'r'
)
as
tar
:
tar
.
extractall
()
elif
filename
.
endswith
(
".gz"
):
with
gzip
.
open
(
filename
,
'rb'
)
as
gin
,
open
(
re
.
sub
(
"\.gz$"
,
""
,
filename
),
'wb'
)
as
bout
:
shutil
.
copyfileobj
(
gin
,
bout
)
else
:
raise
Exception
(
"Compression not supported"
)
def
create_metadata
(
name
,
tool
,
description
=
None
,
creation_date
=
datetime
.
datetime
.
now
().
isoformat
(),
version
=
datetime
.
date
.
today
().
isoformat
(),
other
=
{},
creation_date
=
datetime
.
datetime
.
now
().
isoformat
(),
):
metadata
=
{
'name'
:
name
,
...
...
@@ -28,6 +38,7 @@ def create_metadata(name,
'creation_date'
:
creation_date
,
'version'
:
version
}
metadata
.
update
(
other
)
with
open
(
"metadata.json"
,
"w"
)
as
metadata_file
:
logging
.
info
(
"Writing metadata "
+
json
.
dumps
(
metadata
))
...
...
dbman/main.py
View file @
2b71cd2d
...
...
@@ -19,6 +19,8 @@ def main():
prepare_parser
.
add_argument
(
'-d'
,
'--directory'
,
dest
=
'dbdir'
,
help
=
'Override the databases root directory'
,
type
=
str
)
prepare_parser
.
add_argument
(
'-v'
,
'--version'
,
help
=
'Override the version of the database'
,
type
=
str
)
prepare_parser
.
add_argument
(
'--force_download'
,
help
=
'Force download of the database. Will replace existing downloads'
,
action
=
'store_true'
)
prepare_parser
.
add_argument
(
'--keep_temp'
,
help
=
'Keep temporary data on failure'
,
action
=
'store_true'
)
prepare_parser
.
set_defaults
(
func
=
prepare
)
list_local_databases_parser
=
subparsers
.
add_parser
(
'list_local_databases'
,
help
=
'List the locally available databases'
)
...
...
@@ -82,9 +84,9 @@ def prepare(args):
logging
.
info
(
"Database '%s' not found. Downloading it."
,
args
.
database
)
recipe
=
_get_recipe
(
_recipes
(
args
),
args
.
database
,
"download"
)
logging
.
debug
(
"Found recipe: %s"
,
recipe
)
run_in_tempdir
(
func
=
lambda
:
_run_external_tool
(
recipe
[
'script'
]),
run_in_tempdir
(
func
=
lambda
:
_run_external_tool
(
recipe
[
'script'
]
,
recipe
[
'params'
]
if
'params'
in
recipe
else
None
),
success
=
_rename_directory_after_metadata
,
fail
=
_delete_directory
)
fail
=
_delete_directory
if
not
args
.
keep_temp
else
lambda
x
:
print
()
)
# if not download compile the database
...
...
@@ -114,13 +116,17 @@ def _rename_directory_after_metadata(path):
logging
.
debug
(
"Renaming '%s' to '%s'"
,
oldpath
,
newpath
)
oldpath
.
rename
(
newpath
)
def
_run_external_tool
(
path
):
def
_run_external_tool
(
path
,
params
=
[]
):
"""Runs the external tool and captures stdout, stderr and exitcode in files"""
import
subprocess
with
open
(
".stdout"
,
"w"
)
as
out
:
with
open
(
".stderr"
,
"w"
)
as
err
:
with
open
(
".exitcode"
,
"w"
)
as
exit
:
cp
=
subprocess
.
run
(
path
,
stderr
=
err
,
stdout
=
out
,
shell
=
True
)
if
params
:
path
=
[
path
]
path
.
extend
(
params
)
logging
.
debug
(
"Executing '%s'"
,
path
)
cp
=
subprocess
.
run
(
path
,
stderr
=
err
,
stdout
=
out
)
print
(
cp
.
returncode
,
file
=
exit
)
return
cp
.
returncode
...
...
@@ -145,7 +151,8 @@ def run_in_tempdir(func=None, success=None, fail=None):
def
_recipes
(
args
):
recipes
=
{
'card'
:
{
'download'
:
{
'script'
:
os
.
path
.
abspath
(
pkg_resources
.
resource_filename
(
__name__
,
'recipes/download_card.py'
))}}
'card'
:
{
'download'
:
{
'script'
:
os
.
path
.
abspath
(
pkg_resources
.
resource_filename
(
__name__
,
'recipes/download_card.py'
))}},
'swissprot'
:
{
'download'
:
{
'script'
:
os
.
path
.
abspath
(
pkg_resources
.
resource_filename
(
__name__
,
'recipes/download_uniprotkb.py'
)),
'params'
:
[
'--database'
,
'swissprot'
,
'--type'
,
'fasta'
]}}
}
return
recipes
...
...
@@ -213,7 +220,7 @@ def _databases_dir(args=None):
elif
"DBMAN_DBDIR"
in
os
.
environ
:
return
os
.
environ
[
"DBMAN_DBDIR"
]
else
:
return
"
local_database
s"
return
"
db
s"
if
__name__
==
'__main__'
:
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
...
...
dbman/recipes/download_uniprotkb.py
0 → 100755
View file @
2b71cd2d
#!/usr/bin/env python3
import
dbman.helper
as
h
import
re
import
argparse
import
logging
logging
.
basicConfig
(
level
=
logging
.
INFO
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Download UniProtKB-database files'
)
parser
.
add_argument
(
'-d'
,
'--database'
,
required
=
True
,
help
=
'Database, one of [swissprot, trembl]'
)
parser
.
add_argument
(
'-t'
,
'--type'
,
required
=
True
,
help
=
'Database type, one of [xml, fasta, flatfile]'
)
args
=
parser
.
parse_args
()
url_prefix
=
"ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/"
data
=
{
'swissprot'
:
{
'description'
:
'UniProt/Swiss-Prot'
,
'fileprefix'
:
'uniprot_sprot'
,
'types'
:
{
'xml'
:
'.xml.gz'
,
'fasta'
:
'.fasta.gz'
,
'flatfile'
:
'.dat.gz'
}
},
'trembl'
:
{
'description'
:
'UniProt/TrEMBL'
,
'fileprefix'
:
'uniprot_trembl'
,
'types'
:
{
'xml'
:
'.xml.gz'
,
'fasta'
:
'.fasta.gz'
,
'flatfile'
:
'.dat.gz'
}
}
}
entry
=
data
[
args
.
database
]
dbname
=
args
.
database
+
'_'
+
args
.
type
dbdescription
=
"UniProtKB/Swiss-Prot"
url
=
url_prefix
+
entry
[
'fileprefix'
]
+
entry
[
'types'
][
args
.
type
]
version_url
=
url_prefix
+
"reldate.txt"
# the card version is either available on the homepage or inside the card.json file
# here we use the card.json file
def
extract_version_info
(
file
):
data
=
{}
with
open
(
file
)
as
f
:
for
l
in
f
:
search
=
re
.
search
(
entry
[
'description'
]
+
" Release (\d+_\d+) of (.+)"
,
l
)
if
search
:
data
[
'version'
]
=
search
.
group
(
1
)
data
[
'release_date'
]
=
search
.
group
(
2
)
return
data
# download archive
fn
=
h
.
download
(
url
)
ver_fn
=
h
.
download
(
version_url
)
# write metadata file
other
=
extract_version_info
(
ver_fn
)
other
[
'parts'
]
=
[
{
'files'
:
[
fn
],
'tags'
:
[
args
.
type
,
'protein'
,
'compressed'
,
'gzip'
]
},
{
'files'
:
[
ver_fn
],
'tags'
:
[
'text'
,
'info'
,
'version'
]
}
]
h
.
create_metadata
(
dbname
,
"download"
,
dbdescription
,
other
=
other
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment