Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SOaAS
dbxref
Commits
7943203b
Commit
7943203b
authored
Dec 11, 2017
by
lmueller
Browse files
created taxonomy retriever
parent
6de33f4d
Changes
2
Hide whitespace changes
Inline
Side-by-side
providers.yaml
View file @
7943203b
...
...
@@ -28,7 +28,11 @@
resources
:
html
:
[
"
http://www.uniprot.org/taxonomy/%i"
]
xml
:
[
"
http://www.uniprot.org/taxonomy/%i.rdf"
]
xml_ncbi
:
[
"
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=%i"
]
check_existence
:
"
http://www.uniprot.org/taxonomy/%i"
retriever
:
type
:
'
external'
location
:
'
scripts/retrieve_taxonomy.py'
-
name
:
SequenceOntology
prefixes
:
[
"
SO"
,
"
so"
]
resources
:
...
...
scripts/retrieve_taxonomy.py
0 → 100755
View file @
7943203b
#!/usr/bin/env python3
import
env
import
dbxref.config
import
dbxref.resolver
import
requests
import
xml.etree.ElementTree
as
ET
import
logging
import
json
import
argparse
logger
=
logging
.
getLogger
(
__name__
)
ns
=
{
'TaxaSet'
:
'https://eutils.ncbi.nlm.nih.gov/'
}
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Retrieve taxonomy xml documents for dbxrefs and convert them into json'
)
parser
.
add_argument
(
'--basic'
,
'-b'
,
action
=
'store_true'
,
help
=
'Include dbxref and ......................'
)
parser
.
add_argument
(
'dbxrefs'
,
nargs
=
argparse
.
REMAINDER
)
args
=
parser
.
parse_args
()
resolved
=
dbxref
.
resolver
.
resolve
(
args
.
dbxrefs
,
check_existence
=
False
)
documents
=
[]
for
entry
in
resolved
:
xml_url
=
entry
[
'locations'
][
'xml_ncbi'
][
0
]
logger
.
debug
(
'URL: %s'
,
xml_url
)
r
=
requests
.
get
(
xml_url
)
logger
.
debug
(
'Content: %s'
,
r
.
text
)
root
=
ET
.
fromstring
(
r
.
text
)
output
=
{
'id'
:
entry
[
'dbxref'
]}
error
=
root
.
find
(
'ERROR'
)
if
error
is
not
None
:
output
[
'message'
]
=
error
.
text
.
strip
()
else
:
for
child
in
root
.
findall
(
'Taxon'
):
output
[
'name'
]
=
child
.
find
(
'ScientificName'
).
text
.
strip
()
lineage
=
child
.
find
(
'Lineage'
)
if
lineage
.
text
is
not
None
:
output
[
'lineage'
]
=
lineage
.
text
.
strip
()
else
:
output
[
'lineage'
]
=
'No lineage found'
if
'name'
not
in
output
and
'message'
not
in
output
:
output
[
'message'
]
=
'No entries found! Possibly invalid ID provided'
documents
.
append
(
output
)
print
(
json
.
dumps
(
documents
))
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment