Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
dbman
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SOaAS
dbman
Commits
2b71cd2d
Commit
2b71cd2d
authored
4 years ago
by
Lukas Jelonek
Browse files
Options
Downloads
Patches
Plain Diff
Add uniprotkb download\nadd parameters to external processes\nChange default directory name to dbs
parent
9eb4fd32
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
dbman/helper.py
+14
-3
14 additions, 3 deletions
dbman/helper.py
dbman/main.py
+13
-6
13 additions, 6 deletions
dbman/main.py
dbman/recipes/download_uniprotkb.py
+72
-0
72 additions, 0 deletions
dbman/recipes/download_uniprotkb.py
with
99 additions
and
9 deletions
dbman/helper.py
+
14
−
3
View file @
2b71cd2d
import
wget
import
wget
import
tarfile
import
tarfile
import
gzip
import
shutil
import
re
import
json
import
json
import
datetime
import
datetime
import
logging
import
logging
...
@@ -12,14 +15,21 @@ def download(url):
...
@@ -12,14 +15,21 @@ def download(url):
def
extract
(
filename
):
def
extract
(
filename
):
logging
.
info
(
"
Extracting
"
+
filename
)
logging
.
info
(
"
Extracting
"
+
filename
)
with
tarfile
.
open
(
filename
,
'
r
'
)
as
tar
:
if
'
.tar.
'
in
filename
:
tar
.
extractall
()
with
tarfile
.
open
(
filename
,
'
r
'
)
as
tar
:
tar
.
extractall
()
elif
filename
.
endswith
(
"
.gz
"
):
with
gzip
.
open
(
filename
,
'
rb
'
)
as
gin
,
open
(
re
.
sub
(
"
\.gz$
"
,
""
,
filename
),
'
wb
'
)
as
bout
:
shutil
.
copyfileobj
(
gin
,
bout
)
else
:
raise
Exception
(
"
Compression not supported
"
)
def
create_metadata
(
name
,
def
create_metadata
(
name
,
tool
,
tool
,
description
=
None
,
description
=
None
,
creation_date
=
datetime
.
datetime
.
now
().
isoformat
(),
version
=
datetime
.
date
.
today
().
isoformat
(),
version
=
datetime
.
date
.
today
().
isoformat
(),
other
=
{},
creation_date
=
datetime
.
datetime
.
now
().
isoformat
(),
):
):
metadata
=
{
metadata
=
{
'
name
'
:
name
,
'
name
'
:
name
,
...
@@ -28,6 +38,7 @@ def create_metadata(name,
...
@@ -28,6 +38,7 @@ def create_metadata(name,
'
creation_date
'
:
creation_date
,
'
creation_date
'
:
creation_date
,
'
version
'
:
version
'
version
'
:
version
}
}
metadata
.
update
(
other
)
with
open
(
"
metadata.json
"
,
"
w
"
)
as
metadata_file
:
with
open
(
"
metadata.json
"
,
"
w
"
)
as
metadata_file
:
logging
.
info
(
"
Writing metadata
"
+
json
.
dumps
(
metadata
))
logging
.
info
(
"
Writing metadata
"
+
json
.
dumps
(
metadata
))
...
...
This diff is collapsed.
Click to expand it.
dbman/main.py
+
13
−
6
View file @
2b71cd2d
...
@@ -19,6 +19,8 @@ def main():
...
@@ -19,6 +19,8 @@ def main():
prepare_parser
.
add_argument
(
'
-d
'
,
'
--directory
'
,
dest
=
'
dbdir
'
,
help
=
'
Override the databases root directory
'
,
type
=
str
)
prepare_parser
.
add_argument
(
'
-d
'
,
'
--directory
'
,
dest
=
'
dbdir
'
,
help
=
'
Override the databases root directory
'
,
type
=
str
)
prepare_parser
.
add_argument
(
'
-v
'
,
'
--version
'
,
help
=
'
Override the version of the database
'
,
type
=
str
)
prepare_parser
.
add_argument
(
'
-v
'
,
'
--version
'
,
help
=
'
Override the version of the database
'
,
type
=
str
)
prepare_parser
.
add_argument
(
'
--force_download
'
,
help
=
'
Force download of the database. Will replace existing downloads
'
,
action
=
'
store_true
'
)
prepare_parser
.
add_argument
(
'
--force_download
'
,
help
=
'
Force download of the database. Will replace existing downloads
'
,
action
=
'
store_true
'
)
prepare_parser
.
add_argument
(
'
--keep_temp
'
,
help
=
'
Keep temporary data on failure
'
,
action
=
'
store_true
'
)
prepare_parser
.
set_defaults
(
func
=
prepare
)
prepare_parser
.
set_defaults
(
func
=
prepare
)
list_local_databases_parser
=
subparsers
.
add_parser
(
'
list_local_databases
'
,
help
=
'
List the locally available databases
'
)
list_local_databases_parser
=
subparsers
.
add_parser
(
'
list_local_databases
'
,
help
=
'
List the locally available databases
'
)
...
@@ -82,9 +84,9 @@ def prepare(args):
...
@@ -82,9 +84,9 @@ def prepare(args):
logging
.
info
(
"
Database
'
%s
'
not found. Downloading it.
"
,
args
.
database
)
logging
.
info
(
"
Database
'
%s
'
not found. Downloading it.
"
,
args
.
database
)
recipe
=
_get_recipe
(
_recipes
(
args
),
args
.
database
,
"
download
"
)
recipe
=
_get_recipe
(
_recipes
(
args
),
args
.
database
,
"
download
"
)
logging
.
debug
(
"
Found recipe: %s
"
,
recipe
)
logging
.
debug
(
"
Found recipe: %s
"
,
recipe
)
run_in_tempdir
(
func
=
lambda
:
_run_external_tool
(
recipe
[
'
script
'
]),
run_in_tempdir
(
func
=
lambda
:
_run_external_tool
(
recipe
[
'
script
'
]
,
recipe
[
'
params
'
]
if
'
params
'
in
recipe
else
None
),
success
=
_rename_directory_after_metadata
,
success
=
_rename_directory_after_metadata
,
fail
=
_delete_directory
)
fail
=
_delete_directory
if
not
args
.
keep_temp
else
lambda
x
:
print
()
)
# if not download compile the database
# if not download compile the database
...
@@ -114,13 +116,17 @@ def _rename_directory_after_metadata(path):
...
@@ -114,13 +116,17 @@ def _rename_directory_after_metadata(path):
logging
.
debug
(
"
Renaming
'
%s
'
to
'
%s
'"
,
oldpath
,
newpath
)
logging
.
debug
(
"
Renaming
'
%s
'
to
'
%s
'"
,
oldpath
,
newpath
)
oldpath
.
rename
(
newpath
)
oldpath
.
rename
(
newpath
)
def
_run_external_tool
(
path
):
def
_run_external_tool
(
path
,
params
=
[]
):
"""
Runs the external tool and captures stdout, stderr and exitcode in files
"""
"""
Runs the external tool and captures stdout, stderr and exitcode in files
"""
import
subprocess
import
subprocess
with
open
(
"
.stdout
"
,
"
w
"
)
as
out
:
with
open
(
"
.stdout
"
,
"
w
"
)
as
out
:
with
open
(
"
.stderr
"
,
"
w
"
)
as
err
:
with
open
(
"
.stderr
"
,
"
w
"
)
as
err
:
with
open
(
"
.exitcode
"
,
"
w
"
)
as
exit
:
with
open
(
"
.exitcode
"
,
"
w
"
)
as
exit
:
cp
=
subprocess
.
run
(
path
,
stderr
=
err
,
stdout
=
out
,
shell
=
True
)
if
params
:
path
=
[
path
]
path
.
extend
(
params
)
logging
.
debug
(
"
Executing
'
%s
'"
,
path
)
cp
=
subprocess
.
run
(
path
,
stderr
=
err
,
stdout
=
out
)
print
(
cp
.
returncode
,
file
=
exit
)
print
(
cp
.
returncode
,
file
=
exit
)
return
cp
.
returncode
return
cp
.
returncode
...
@@ -145,7 +151,8 @@ def run_in_tempdir(func=None, success=None, fail=None):
...
@@ -145,7 +151,8 @@ def run_in_tempdir(func=None, success=None, fail=None):
def
_recipes
(
args
):
def
_recipes
(
args
):
recipes
=
{
recipes
=
{
'
card
'
:
{
'
download
'
:
{
'
script
'
:
os
.
path
.
abspath
(
pkg_resources
.
resource_filename
(
__name__
,
'
recipes/download_card.py
'
))}}
'
card
'
:
{
'
download
'
:
{
'
script
'
:
os
.
path
.
abspath
(
pkg_resources
.
resource_filename
(
__name__
,
'
recipes/download_card.py
'
))}},
'
swissprot
'
:
{
'
download
'
:
{
'
script
'
:
os
.
path
.
abspath
(
pkg_resources
.
resource_filename
(
__name__
,
'
recipes/download_uniprotkb.py
'
)),
'
params
'
:
[
'
--database
'
,
'
swissprot
'
,
'
--type
'
,
'
fasta
'
]}}
}
}
return
recipes
return
recipes
...
@@ -213,7 +220,7 @@ def _databases_dir(args=None):
...
@@ -213,7 +220,7 @@ def _databases_dir(args=None):
elif
"
DBMAN_DBDIR
"
in
os
.
environ
:
elif
"
DBMAN_DBDIR
"
in
os
.
environ
:
return
os
.
environ
[
"
DBMAN_DBDIR
"
]
return
os
.
environ
[
"
DBMAN_DBDIR
"
]
else
:
else
:
return
"
local_database
s
"
return
"
db
s
"
if
__name__
==
'
__main__
'
:
if
__name__
==
'
__main__
'
:
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
...
...
This diff is collapsed.
Click to expand it.
dbman/recipes/download_uniprotkb.py
0 → 100755
+
72
−
0
View file @
2b71cd2d
#!/usr/bin/env python3
import
dbman.helper
as
h
import
re
import
argparse
import
logging
logging
.
basicConfig
(
level
=
logging
.
INFO
)
parser
=
argparse
.
ArgumentParser
(
description
=
'
Download UniProtKB-database files
'
)
parser
.
add_argument
(
'
-d
'
,
'
--database
'
,
required
=
True
,
help
=
'
Database, one of [swissprot, trembl]
'
)
parser
.
add_argument
(
'
-t
'
,
'
--type
'
,
required
=
True
,
help
=
'
Database type, one of [xml, fasta, flatfile]
'
)
args
=
parser
.
parse_args
()
url_prefix
=
"
ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/
"
data
=
{
'
swissprot
'
:
{
'
description
'
:
'
UniProt/Swiss-Prot
'
,
'
fileprefix
'
:
'
uniprot_sprot
'
,
'
types
'
:
{
'
xml
'
:
'
.xml.gz
'
,
'
fasta
'
:
'
.fasta.gz
'
,
'
flatfile
'
:
'
.dat.gz
'
}
},
'
trembl
'
:
{
'
description
'
:
'
UniProt/TrEMBL
'
,
'
fileprefix
'
:
'
uniprot_trembl
'
,
'
types
'
:
{
'
xml
'
:
'
.xml.gz
'
,
'
fasta
'
:
'
.fasta.gz
'
,
'
flatfile
'
:
'
.dat.gz
'
}
}
}
entry
=
data
[
args
.
database
]
dbname
=
args
.
database
+
'
_
'
+
args
.
type
dbdescription
=
"
UniProtKB/Swiss-Prot
"
url
=
url_prefix
+
entry
[
'
fileprefix
'
]
+
entry
[
'
types
'
][
args
.
type
]
version_url
=
url_prefix
+
"
reldate.txt
"
# the card version is either available on the homepage or inside the card.json file
# here we use the card.json file
def
extract_version_info
(
file
):
data
=
{}
with
open
(
file
)
as
f
:
for
l
in
f
:
search
=
re
.
search
(
entry
[
'
description
'
]
+
"
Release (\d+_\d+) of (.+)
"
,
l
)
if
search
:
data
[
'
version
'
]
=
search
.
group
(
1
)
data
[
'
release_date
'
]
=
search
.
group
(
2
)
return
data
# download archive
fn
=
h
.
download
(
url
)
ver_fn
=
h
.
download
(
version_url
)
# write metadata file
other
=
extract_version_info
(
ver_fn
)
other
[
'
parts
'
]
=
[
{
'
files
'
:
[
fn
],
'
tags
'
:
[
args
.
type
,
'
protein
'
,
'
compressed
'
,
'
gzip
'
]
},
{
'
files
'
:
[
ver_fn
],
'
tags
'
:
[
'
text
'
,
'
info
'
,
'
version
'
]
}
]
h
.
create_metadata
(
dbname
,
"
download
"
,
dbdescription
,
other
=
other
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment