Commit 100b4c88 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Added module for ghostx vs swissprot

parent 623c7cd7
#!/usr/bin/python3
import sys
import json
import argparse
parser = argparse.ArgumentParser(description='Convert ghostx results to json documents')
parser.add_argument('--result', '-r', required=True, help='The ghostx result directory')
parser.add_argument('--output', '-o', required=True, help='The converted results json file')
args = parser.parse_args()
result_directory = args.result
info_filename = args.result + "/info.json"
result_filename = args.result + "/results.tsv"
documents = {}
tool = None
# read tool info
with open(info_filename) as f:
tool = json.load(f)
with open(result_filename) as f:
for line in f:
line = line.strip()
split = line.split("\t")
if not split[0] in documents:
documents[split[0]] = {"id": split[0], "computations": [{'tool':tool, 'results':[]}]}
results = documents[split[0]]['computations'][0]['results']
result = {}
result["dbxref"] = "UniProtKB/Swiss-Prot:"+split[1].split("|")[1]
result["percent_identity"] = float(split[2])
result['qloc'] = split[6] + '-' + split[7]
result['sloc'] = split[8] + '-' + split[9]
result['evalue'] = float(split[10])
results.append(result)
output_filename = args.output
with open(output_filename, 'w') as o:
json.dump(documents, o)
#!/usr/bin/env python3
import argparse
import config
import re
from os import system,makedirs
import subprocess
import json
ghostx_tool = config.load_config()['tools'].get('ghostx', 'ghostx')
parser = argparse.ArgumentParser(description='Identify homologues in the swissprot database')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
parser.add_argument('--database', '-d', required=True, help='Database to search in')
parser.add_argument('--output', '-o', required=True, help='The result directory. Will contain info.json and results.tsv.')
args = parser.parse_args()
# Aproach:
# directory for output
# info.json -> Tool info
# result.json -> Tool result
toolconfig = {
'name': 'ghostx',
'database': args.database
}
# find version
output = subprocess.run([ghostx_tool], stderr=subprocess.PIPE)
text =output.stderr.decode('ascii')
result = re.search('version (.*)', text)
toolconfig['version'] = result.group(1)
makedirs(args.output, exist_ok=True)
with open(args.output + '/info.json', 'w') as f:
json.dump(toolconfig, f)
system(ghostx_tool + " aln -d " + args.database + " -o " + args.output + "/results.tsv -i " + args.fasta)
tools:
signalp: '/vol/biotools/bin/signalp'
ghostx: '/vol/biotools/bin/ghostx'
profiles:
- name: 'fast'
info: 'Profile that contains tools that give a fast result'
modules:
ghostx_swissprot:
signalp:
organism: 'euk'
- name: 'common'
......
# Module manifest for the blastp against swissprot analysis
# The name of the module. Is needed for the list-analyses option, for custom
# configurations and custom profiles
name: 'ghostx_swissprot'
# Short description of the analysis
info: 'ghostx analysis against swissprot'
# The name of the script for the analysis step. Must take a --fasta parameter
analysis_script: 'run_ghostx.py'
# The name of the result to json converter script. Must take one parameter, the
# result file from the analysis_script
converter_script: 'convert_ghostx.py'
parameters:
database: '/vol/biodb/ghostx/uniprot_sprot'
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment