Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SOaAS
dbxref
Commits
d60fa151
Commit
d60fa151
authored
Dec 12, 2017
by
lmueller
Browse files
switched from ncbi to ebi for taxonomy retriever
parent
7943203b
Changes
3
Hide whitespace changes
Inline
Side-by-side
providers.yaml
View file @
d60fa151
...
...
@@ -27,6 +27,7 @@
prefixes
:
[
"
Taxon"
,
"
taxon"
,
"
taxid"
]
resources
:
html
:
[
"
http://www.uniprot.org/taxonomy/%i"
]
json
:
[
"
https://www.ebi.ac.uk/ena/data/taxonomy/v1/taxon/tax-id/%i"
]
xml
:
[
"
http://www.uniprot.org/taxonomy/%i.rdf"
]
xml_ncbi
:
[
"
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id=%i"
]
check_existence
:
"
http://www.uniprot.org/taxonomy/%i"
...
...
scripts/retrieve_gene_ontology.py
View file @
d60fa151
...
...
@@ -16,6 +16,9 @@ def main():
parser
.
add_argument
(
'--relations'
,
'-r'
,
action
=
'store_true'
,
help
=
'Include id, parents and children'
)
parser
.
add_argument
(
'dbxrefs'
,
nargs
=
argparse
.
REMAINDER
)
args
=
parser
.
parse_args
()
if
not
args
.
basic
and
not
args
.
relations
:
args
.
basic
=
True
args
.
relations
=
True
resolved
=
dbxref
.
resolver
.
resolve
(
args
.
dbxrefs
,
check_existence
=
False
)
documents
=
[]
for
entry
in
resolved
:
...
...
@@ -32,9 +35,6 @@ def main():
output
.
update
(
read_basic
(
d
))
if
args
.
relations
:
output
.
update
(
read_relations
(
d
))
if
not
args
.
basic
and
not
args
.
relations
:
output
.
update
(
read_basic
(
d
))
output
.
update
(
read_relations
(
d
))
documents
.
append
(
output
)
print
(
json
.
dumps
(
documents
))
...
...
scripts/retrieve_taxonomy.py
View file @
d60fa151
...
...
@@ -4,43 +4,62 @@ import env
import
dbxref.config
import
dbxref.resolver
import
requests
import
xml.etree.ElementTree
as
ET
import
logging
import
json
import
argparse
logger
=
logging
.
getLogger
(
__name__
)
ns
=
{
'TaxaSet'
:
'https://eutils.ncbi.nlm.nih.gov/'
}
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Retrieve taxonomy xml documents for dbxrefs and convert them into json'
)
parser
.
add_argument
(
'--basic'
,
'-b'
,
action
=
'store_true'
,
help
=
'Include dbxref and ......................'
)
parser
.
add_argument
(
'--basic'
,
'-b'
,
action
=
'store_true'
,
help
=
'Include dbxref, scientificName, commonName, lineage and rank'
)
parser
.
add_argument
(
'--geneticcodes'
,
'-g'
,
action
=
'store_true'
,
help
=
'Include geneticCode and mitochondrialGeneticCode'
)
parser
.
add_argument
(
'dbxrefs'
,
nargs
=
argparse
.
REMAINDER
)
args
=
parser
.
parse_args
()
if
not
args
.
basic
and
not
args
.
geneticcodes
:
args
.
basic
=
True
args
.
geneticcodes
=
True
resolved
=
dbxref
.
resolver
.
resolve
(
args
.
dbxrefs
,
check_existence
=
False
)
documents
=
[]
for
entry
in
resolved
:
xml
_url
=
entry
[
'locations'
][
'
xml_ncbi
'
][
0
]
logger
.
debug
(
'URL: %s'
,
xml
_url
)
r
=
requests
.
get
(
xml
_url
)
json
_url
=
entry
[
'locations'
][
'
json
'
][
0
]
logger
.
debug
(
'URL: %s'
,
json
_url
)
r
=
requests
.
get
(
json
_url
)
logger
.
debug
(
'Content: %s'
,
r
.
text
)
root
=
ET
.
fromstring
(
r
.
text
)
output
=
{
'id'
:
entry
[
'dbxref'
]}
error
=
root
.
find
(
'ERROR'
)
if
error
is
not
None
:
output
[
'message'
]
=
error
.
text
.
strip
()
d
=
{}
try
:
d
=
json
.
loads
(
r
.
text
)
except
:
pass
if
len
(
d
)
>
0
:
if
args
.
basic
:
output
.
update
(
read_basic
(
d
))
if
args
.
geneticcodes
:
output
.
update
(
read_geneticCodes
(
d
))
else
:
for
child
in
root
.
findall
(
'Taxon'
):
output
[
'name'
]
=
child
.
find
(
'ScientificName'
).
text
.
strip
()
lineage
=
child
.
find
(
'Lineage'
)
if
lineage
.
text
is
not
None
:
output
[
'lineage'
]
=
lineage
.
text
.
strip
()
else
:
output
[
'lineage'
]
=
'No lineage found'
if
'name'
not
in
output
and
'message'
not
in
output
:
output
[
'message'
]
=
'No entries found! Possibly invalid ID provided'
output
[
'message'
]
=
"An error occurred! probably invalid ID"
documents
.
append
(
output
)
print
(
json
.
dumps
(
documents
))
def
read_basic
(
d
):
out
=
{}
if
'scientificName'
in
d
:
out
[
'scientificName'
]
=
d
[
'scientificName'
]
if
'commonName'
in
d
:
out
[
'commonName'
]
=
d
[
'commonName'
]
if
'lineage'
in
d
:
out
[
'lineage'
]
=
d
[
'lineage'
]
if
'rank'
in
d
:
out
[
'rank'
]
=
d
[
'rank'
]
return
(
out
)
def
read_geneticCodes
(
d
):
out
=
{
'geneticCodes'
:
{}}
if
'geneticCode'
in
d
:
out
[
'geneticCodes'
][
'geneticCode'
]
=
d
[
'geneticCode'
]
if
'mitochondrialGeneticCode'
in
d
:
out
[
'geneticCodes'
][
'mitochondrialGeneticCode'
]
=
d
[
'mitochondrialGeneticCode'
]
return
(
out
)
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment