#!/usr/bin/env python3
import dbman.helper as h
import json
import os
import gzip
import urllib.request
from bs4 import BeautifulSoup
import datetime
import logging
dbname = "vfdb"
dbdescription = "The virulence factor database"
url = ""
# get version
data = urllib.request.urlopen(url + "download.htm").read()
bs = BeautifulSoup(data)
version = [s.getText() for s in bs.find_all('i') if 'Last update' in s.getText()][0]
version = version.replace('Last update: ', "")
version = datetime.datetime.strptime(version, "%a %b %d %H:%M:%S %Y").date().isoformat()
# download archives
files = [
{'file': 'Down/VFs.xls.gz', 'tags':['excel', 'gzip', 'compressed']},
{'file': 'Down/Comparative_tables_from_VFDB.tar.gz', 'tags':['excel', 'tar', 'gzip', 'compressed']},
{'file': 'Down/VFDB_setA_nt.fas.gz', 'tags':['core', 'fasta', 'nucleotide', 'gzip', 'compressed']},
{'file': 'Down/VFDB_setA_pro.fas.gz', 'tags':['core', 'fasta', 'protein', 'gzip', 'compressed']},
{'file': 'Down/VFDB_setB_nt.fas.gz', 'tags':['full', 'fasta', 'nucleotide', 'gzip', 'compressed']},
{'file': 'Down/VFDB_setB_pro.fas.gz', 'tags':['full', 'fasta', 'protein', 'gzip', 'compressed']},
parts = []
for file in files:
fn = + file['file'])
part = {'files': [fn], 'tags': file['tags']}
# write metadata file
h.create_metadata(dbname, "download", dbdescription, version=version, other={'parts':parts})
