Commit 05876ca4 authored by jkeller's avatar jkeller
Browse files

Added documentation and fixed minor bugs and formatting

parent abcff229
......@@ -10,6 +10,7 @@ logger = logging.getLogger(__name__)
def main():
"""main functiion() for script usage"""
parser = argparse.ArgumentParser(description="Retrieve protein orthology documents from EggNog and convert them "
"into json")
parser.add_argument("--domain", "-d", action="store_true", help="Include a list of PFAM and SMART domains in the "
......@@ -33,6 +34,7 @@ def main():
def retrieve(dbxrefs, domain, go_terms, tree):
"""Retrieves data from EggNOG database as json and parses into dbxrefs json format"""
resolved = dbxref.resolver.resolve(dbxrefs, check_existence=False)
documents = []
for entry in resolved:
......@@ -52,15 +54,13 @@ def retrieve(dbxrefs, domain, go_terms, tree):
for pfam_entry in domain_json["domains"]["PFAM"]:
pfam_list.append({"protein": pfam_entry[0],
"1": pfam_entry[1],
"2": pfam_entry[2],
"3": pfam_entry[3]
"sequence count": pfam_entry[1],
"frequency": pfam_entry[2],
for smart_entry in domain_json["domains"]["SMART"]:
smart_list.append({"protein": smart_entry[0],
"1": smart_entry[1],
"2": smart_entry[2],
"3": smart_entry[3],
"sequence count": smart_entry[1],
"frequency": smart_entry[2]
except KeyError:
......@@ -86,25 +86,22 @@ def retrieve(dbxrefs, domain, go_terms, tree):
for mol_func_entry in go_term_json["go_terms"]["Molecular Function"]:
mol_func.update({mol_func_entry[0]: {"function:": mol_func_entry[1],
"GO evidence code": mol_func_entry[2],
"3": mol_func_entry[3],
"4": mol_func_entry[4],
"5": mol_func_entry[5]
"sequence count": mol_func_entry[3],
"frequency": mol_func_entry[4]
for bio_pro_entry in go_term_json["go_terms"]["Biological Process"]:
bio_pro.update({bio_pro_entry[0]: {"process": bio_pro_entry[1],
"GO evidence code": bio_pro_entry[2],
"3": bio_pro_entry[3],
"4": bio_pro_entry[4],
"5": bio_pro_entry[5]
"sequence count": bio_pro_entry[3],
"frequency": bio_pro_entry[4]
for cell_comp_entry in go_term_json["go_terms"]["Cellular Component"]:
cell_comp.update({cell_comp_entry[0]: {"component": cell_comp_entry[1],
"GO evidence code": cell_comp_entry[2],
"3": cell_comp_entry[3],
"4": cell_comp_entry[4],
"5": cell_comp_entry[5]
"sequence count": cell_comp_entry[3],
"frequency": cell_comp_entry[4]
except KeyError:
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment