Commit 4cb6dfef authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Fix bug: uniprotkb version is not parsed correctly

parent a32b61fc
......@@ -14,7 +14,7 @@ args = parser.parse_args()
url_prefix="ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/"
data = {
'swissprot': {
'description': 'UniProt/Swiss-Prot',
'description': 'UniProtKB/Swiss-Prot',
'fileprefix': 'uniprot_sprot',
'types': {
'xml': '.xml.gz',
......@@ -23,7 +23,7 @@ data = {
}
},
'trembl': {
'description': 'UniProt/TrEMBL',
'description': 'UniProtKB/TrEMBL',
'fileprefix': 'uniprot_trembl',
'types': {
'xml': '.xml.gz',
......@@ -35,22 +35,19 @@ data = {
entry = data[args.database]
dbname = args.database + '_' + args.type
dbname = args.database
dbdescription = "UniProtKB/Swiss-Prot"
url = url_prefix + entry['fileprefix'] + entry['types'][args.type]
version_url = url_prefix + "reldate.txt"
# the card version is either available on the homepage or inside the card.json file
# here we use the card.json file
def extract_version_info(file):
def extract_version_info(prefix, file):
data = {}
with open(file) as f:
for l in f:
search = re.search(entry['description'] + " Release (\d+_\d+) of (.+)", l)
search = re.search(prefix + " Release (\d+_\d+) of (.+)", l)
if search:
data['version'] = search.group(1)
data['release_date'] = search.group(2)
return data
# download archive
......@@ -58,7 +55,7 @@ fn = h.download(url)
ver_fn = h.download(version_url)
# write metadata file
other = extract_version_info(ver_fn)
other = extract_version_info(entry['description'], ver_fn)
other['parts'] = [
{
'files': [fn],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment