Commit ca4ffd6a authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Added blast vs swissprot tool and converter

parent b8878d71
#!/usr/bin/python3
import sys
import json
filename = sys.argv[1]
documents = {}
with open(filename) as f:
header = {}
for line in f:
line = line.strip()
if line.startswith('#'):
if line.startswith('# Fields:'):
header_entries = line.replace('# Fields: ','').split(', ')
header = {}
for idx, key in enumerate(header_entries):
header[key] = idx
else:
split = line.split("\t")
if not split[0] in documents:
documents[split[0]] = {"id": split[0], "results": []}
result = {}
result["dbxref"] = "UniProtKB/Swiss-Prot:"+split[header['subject id']].split("|")[1]
if '% identity' in header:
result["percent_identity"] = float(split[header['% identity']])
if 'q. start' in header and 'q. end' in header:
result['qloc'] = split[header['q. start']] + '-' + split[header['q. end']]
if 's. start' in header and 's. end' in header:
result['sloc'] = split[header['s. start']] + '-' + split[header['s. end']]
if 'evalue' in header:
result['evalue'] = float(split[header['evalue']])
if 'BTOP' in header:
result['btop'] = split[header['BTOP']]
documents[split[0]]["results"].append(result)
for key in documents:
print(json.dumps(documents[key]))
#!/usr/bin/env python3
import argparse
import yaml
import os
from os import system
def get_config_file():
"""Finds the config file, that is located in the directory above script"""
script_path = os.path.realpath(__file__)
script_dir = os.path.dirname(script_path)
config_dir = os.path.dirname(script_dir)
config_file = os.path.join(config_dir, 'config.yaml')
return config_file
def get_blastp_tool():
"""Extracts the configuration entry for blastp. If it is not present, it defaults to blastp"""
tool = None
with open(get_config_file()) as f:
config = yaml.load(f)
tool = config['tools'].get('blastp', 'blastp')
return tool
parser = argparse.ArgumentParser(description='Identify homologues in the swissprot database')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
parser.add_argument('--evalue', '-e', default='0.0001', help='Evalue cutoff')
parser.add_argument('--alignment', '-a', action='store_true', help='Include alignments in btop format')
args = parser.parse_args()
format = '7 qseqid sseqid pident qstart qend sstart send evalue bitscore'
if args.alignment:
format += ' btop'
system(get_blastp_tool() + " -db /vol/biodb/uniprot/uniprot_sprot.fasta -num_threads 7 -outfmt '" + format + "' -out - -query " + args.fasta)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment