Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SOaAS
dbxref
Commits
4da4423e
Commit
4da4423e
authored
Sep 14, 2017
by
Lukas Jelonek
Browse files
Each entry gets a status code. Introduced more status codes
parent
526d86ff
Changes
3
Hide whitespace changes
Inline
Side-by-side
dbxref/resolver.py
View file @
4da4423e
import
requests
import
requests
from
cachecontrol
import
CacheControl
from
cachecontrol.caches.file_cache
import
FileCache
import
logging
import
logging
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
from
dbxref.config
import
load_providers
from
dbxref.config
import
load_providers
providers
=
load_providers
()
providers
=
load_providers
()
FOUND
=
'FOUND'
cache
=
FileCache
(
".web_cache"
,
forever
=
True
)
NOT_FOUND
=
'NOT_FOUND'
sess
=
CacheControl
(
requests
.
Session
(),
cache
=
cache
)
UNSUPPORTED
=
'UNSUPPORTED'
TIMEOUT
=
'TIMEOUT'
STATUS_EXISTS
=
'found'
STATUS_NOT_EXISTS
=
'not found'
STATUS_UNKNOWN
=
'status unknown'
STATUS_NOT_CHECKED
=
'status not checked'
STATUS_CHECK_NOT_SUPPORTED
=
'check of status not supported'
STATUS_CHECK_TIMEOUT
=
'status check timed out'
STATUS_UNSUPPORTED_DB
=
'database unsupported'
def
resolve
(
strings
,
check_existence
=
True
):
def
resolve
(
strings
,
check_existence
=
True
):
results
=
[]
results
=
[]
for
s
in
strings
:
for
s
in
strings
:
exists
=
FOUN
D
status
=
STATUS_NOT_CHECKE
D
if
check_existence
:
if
check_existence
:
exist
s
=
check_dbxref_exists
(
s
)
statu
s
=
check_dbxref_exists
(
s
)
dbxref
=
convert_string_to_dbxref
(
s
)
dbxref
=
convert_string_to_dbxref
(
s
)
if
exists
==
FOUND
and
dbxref
[
'db'
]
in
providers
:
if
dbxref
[
'db'
]
in
providers
:
provider
=
providers
[
dbxref
[
'db'
]]
provider
=
providers
[
dbxref
[
'db'
]]
locations
=
{}
locations
=
{}
for
_type
in
provider
[
'resources'
]:
for
_type
in
provider
[
'resources'
]:
...
@@ -25,7 +33,9 @@ def resolve(strings, check_existence=True):
...
@@ -25,7 +33,9 @@ def resolve(strings, check_existence=True):
for
url_template
in
provider
[
'resources'
][
_type
]:
for
url_template
in
provider
[
'resources'
][
_type
]:
urls
.
append
(
compile_url
(
url_template
,
dbxref
))
urls
.
append
(
compile_url
(
url_template
,
dbxref
))
locations
[
_type
]
=
urls
locations
[
_type
]
=
urls
results
.
append
({
'dbxref'
:
dbxref
[
'db'
]
+
':'
+
dbxref
[
'id'
],
'locations'
:
locations
})
results
.
append
({
'dbxref'
:
dbxref
[
'db'
]
+
':'
+
dbxref
[
'id'
],
'locations'
:
locations
,
'status'
:
status
})
else
:
results
.
append
({
'dbxref'
:
dbxref
[
'db'
]
+
':'
+
dbxref
[
'id'
],
'status'
:
STATUS_UNSUPPORTED_DB
})
return
results
return
results
def
check_dbxref_exists
(
string
):
def
check_dbxref_exists
(
string
):
...
@@ -33,33 +43,33 @@ def check_dbxref_exists(string):
...
@@ -33,33 +43,33 @@ def check_dbxref_exists(string):
if
dbxref
[
'db'
]
in
providers
:
if
dbxref
[
'db'
]
in
providers
:
provider
=
providers
[
dbxref
[
'db'
]]
provider
=
providers
[
dbxref
[
'db'
]]
urls
=
[]
urls
=
[]
exists
=
FOUN
D
exists
=
STATUS_NOT_CHECKE
D
if
'check_existence'
in
provider
:
if
'check_existence'
in
provider
:
url
=
compile_url
(
provider
[
'check_existence'
],
dbxref
)
url
=
compile_url
(
provider
[
'check_existence'
],
dbxref
)
logger
.
debug
(
'Checking existence of dbxref at "%s"'
,
url
)
logger
.
debug
(
'Checking existence of dbxref at "%s"'
,
url
)
exists
=
check_url_exists
(
url
)
exists
=
check_url_exists
(
url
)
if
exists
==
NOT_FOUND
or
exists
==
TIMEOUT
:
logger
.
info
(
'The dbxref "%s" cannot be found. It will be ignored.'
,
string
)
return
exists
return
exists
else
:
else
:
return
UN
SUPPORTED
return
STATUS_CHECK_NOT_
SUPPORTED
return
UNSUPPORTED
return
STATUS_
UNSUPPORTED
_DB
def
compile_url
(
template
,
dbxref
):
def
compile_url
(
template
,
dbxref
):
return
template
.
replace
(
'%i'
,
dbxref
[
'id'
]).
replace
(
'%d'
,
dbxref
[
'db'
])
return
template
.
replace
(
'%i'
,
dbxref
[
'id'
]).
replace
(
'%d'
,
dbxref
[
'db'
])
def
check_url_exists
(
url
):
def
check_url_exists
(
url
):
try
:
try
:
r
=
requ
es
t
s
.
head
(
url
,
allow_redirects
=
True
,
timeout
=
1
)
r
=
s
ess
.
head
(
url
,
allow_redirects
=
True
,
timeout
=
1
)
r
.
close
()
r
.
close
()
if
r
.
status_code
<=
400
:
if
r
.
status_code
<=
400
:
return
FOUND
return
STATUS_EXISTS
else
:
else
:
logger
.
debug
(
'The server responded with status code: %s'
,
r
.
status_code
)
logger
.
debug
(
'The server responded with status code: %s'
,
r
.
status_code
)
return
NOT_FOUND
return
STATUS_NOT_EXISTS
except
requests
.
exceptions
.
Timeout
as
ex
:
except
requests
.
exceptions
.
Timeout
as
ex
:
logger
.
info
(
'Timeout for URL: "%s"'
,
url
)
logger
.
info
(
'Timeout for URL: "%s"'
,
url
)
return
TIMEOUT
return
STATUS_CHECK_TIMEOUT
except
:
return
STATUS_NOT_EXISTS
def
convert_string_to_dbxref
(
string
):
def
convert_string_to_dbxref
(
string
):
"""
"""
...
...
providers.yaml
View file @
4da4423e
...
@@ -74,7 +74,7 @@
...
@@ -74,7 +74,7 @@
resources
:
resources
:
html
:
[
"
http://www.ebi.ac.uk/QuickGO/GTerm?id=GO:%i"
]
html
:
[
"
http://www.ebi.ac.uk/QuickGO/GTerm?id=GO:%i"
]
xml
:
[
"
http://www.ebi.ac.uk/QuickGO/GTerm?id=GO:%i&format=oboxml"
]
xml
:
[
"
http://www.ebi.ac.uk/QuickGO/GTerm?id=GO:%i&format=oboxml"
]
check_existence
:
"
http://
www.ebi.ac.uk/QuickGO/GTerm?id=GO:%i&format=oboxml
"
check_existence
:
"
http://
purl.obolibrary.org/obo/GO_%i
"
-
name
:
HTTP
-
name
:
HTTP
prefixes
:
[
"
http"
,
"
https"
]
prefixes
:
[
"
http"
,
"
https"
]
resources
:
resources
:
...
...
tests/test_resolver.py
View file @
4da4423e
...
@@ -22,32 +22,32 @@ class TestDbxrefResolve(unittest.TestCase):
...
@@ -22,32 +22,32 @@ class TestDbxrefResolve(unittest.TestCase):
def
test_check_dbxref_exists
(
self
):
def
test_check_dbxref_exists
(
self
):
import
logging
import
logging
from
dbxref.resolver
import
FOUND
,
NOT_FOUND
,
UNSUPPORTED
from
dbxref.resolver
import
STATUS_EXISTS
,
STATUS_NOT_EXISTS
,
STATUS_UNSUPPORTED_DB
,
STATUS_UNKNOWN
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
logging
.
getLogger
().
setLevel
(
logging
.
WARNING
)
logging
.
getLogger
().
setLevel
(
logging
.
WARNING
)
resolver
.
logger
.
setLevel
(
logging
.
DEBUG
)
resolver
.
logger
.
setLevel
(
logging
.
DEBUG
)
data
=
[
data
=
[
# existent ids
# existent ids
(
'GO:0097281'
,
FOUND
),
(
'GO:0097281'
,
STATUS_EXISTS
),
(
'EC:1.1.1.1'
,
FOUND
),
(
'EC:1.1.1.1'
,
STATUS_EXISTS
),
(
'UniProtKB/Swiss-Prot:P12345'
,
FOUND
),
(
'UniProtKB/Swiss-Prot:P12345'
,
STATUS_EXISTS
),
(
'UniProtKB/TrEMBL:A2VB99'
,
FOUND
),
(
'UniProtKB/TrEMBL:A2VB99'
,
STATUS_EXISTS
),
(
'taxon:452271'
,
FOUND
),
(
'taxon:452271'
,
STATUS_EXISTS
),
(
'pubmed:19037750'
,
FOUND
),
(
'pubmed:19037750'
,
STATUS_EXISTS
),
(
'PDB:4AJY'
,
FOUND
),
(
'PDB:4AJY'
,
STATUS_EXISTS
),
(
'http://www.google.de'
,
FOUND
),
(
'http://www.google.de'
,
STATUS_EXISTS
),
(
'https://www.google.de'
,
FOUND
),
(
'https://www.google.de'
,
STATUS_EXISTS
),
# non existent ids
# non existent ids
(
'GO:123'
,
NOT_FOUND
),
(
'GO:123'
,
STATUS_NOT_EXISTS
),
(
'EC:hoho'
,
NOT_FOUND
),
(
'EC:hoho'
,
STATUS_NOT_EXISTS
),
(
'UniProtKB/Swiss-Prot:45'
,
NOT_FOUND
),
(
'UniProtKB/Swiss-Prot:45'
,
STATUS_NOT_EXISTS
),
(
'UniProtKB/TrEMBL:99'
,
NOT_FOUND
),
(
'UniProtKB/TrEMBL:99'
,
STATUS_NOT_EXISTS
),
(
'taxon:hoho'
,
NOT_FOUND
),
(
'taxon:hoho'
,
STATUS_NOT_EXISTS
),
(
'pubmed:hoho'
,
NOT_FOUND
),
(
'pubmed:hoho'
,
STATUS_NOT_EXISTS
),
(
'PDB:hoho'
,
NOT_FOUND
),
(
'PDB:hoho'
,
STATUS_NOT_EXISTS
),
(
'http://wurst'
,
NOT_FOUND
),
(
'http://wurst'
,
STATUS_NOT_EXISTS
),
(
'https://wurst'
,
NOT_FOUND
),
(
'https://wurst'
,
STATUS_NOT_EXISTS
),
# currently unsupported
# currently unsupported
#('GeneID:956582', FOUND),
#('GeneID:956582', FOUND),
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment