Commit 6f1bf564 authored by hmueller's avatar hmueller
Browse files

Include fasta header conversion and restoration steps in ghostx module scripts.

parent 05fd6a68
#!/usr/bin/python3
import sys
import json
import argparse
from os import path
import subprocess
parser = argparse.ArgumentParser(description='Convert ghostx results to json documents')
parser.add_argument('--result', '-r', required=True, help='The ghostx result directory')
......@@ -55,3 +58,6 @@ with open(result_filename) as f:
output_filename = args.output
with open(output_filename, 'w') as o:
json.dump(documents, o)
restore_seq_ids_tool = path.dirname(__file__) + '/restore_seq_id_from_enumeration.py'
subprocess.run([restore_seq_ids_tool, '-j', output_filename, '-e', args.result + '/enum_headers.tsv'])
......@@ -2,9 +2,11 @@
import argparse
import fileinput
import os
parser = argparse.ArgumentParser(description='Replaces fasta headers with unique numbers and stores original headers in tsv file')
parser = argparse.ArgumentParser(description='Replaces fasta headers with unique numbers and saves both in tsv format')
parser.add_argument('--fasta', '-f', required=True, help='The fasta file')
parser.add_argument('--enum-headers-dir', '-d', default='.', help='Target directory for headers file enum_headers.tsv')
args = parser.parse_args()
fasta = args.fasta
......@@ -21,8 +23,7 @@ with fileinput.FileInput(fasta, inplace=True) as f:
else:
print(line, end='')
headers_dict_file = 'enum_header_dict.tsv'
with open(headers_dict_file, 'w') as o:
enum_headers_file = args.enum_headers_dir + '/enum_headers.tsv'
with open(enum_headers_file, 'w') as o:
for key in headers_dict:
o.write("{}\t{}\n".format(key, headers_dict[key]))
......@@ -8,18 +8,24 @@ parser.add_argument('--json', '-j', required=True, help='The results json file')
parser.add_argument('--enum-headers', '-e', required=True, help='The enumerated original headers in tsv format')
args = parser.parse_args()
header_dict = {}
documents = {}
seq_id_dict = {}
docs_enumerated = {}
with open(args.json) as j:
documents = json.load(j)
docs_enumerated = json.load(j)
with open(args.enum_headers) as h:
for line in h:
num, header = line.strip().split('\t')
header_dict[num] = header
seq_id_dict[num] = header.split()[0]
documents["id"] = header_dict[documents["id"]].split()[0]
documents_restored = {}
for num in docs_enumerated:
seq_id = seq_id_dict[num]
doc = docs_enumerated[num]
doc["id"] = seq_id
documents_restored[seq_id] = doc
with open(args.json, 'w') as o:
json.dump(documents, o)
json.dump(documents_restored, o)
#!/usr/bin/env python3
import env
import argparse
import re
from os import system,makedirs
from os import system,makedirs,path
from psot import config
import subprocess
import json
......@@ -15,6 +16,11 @@ parser.add_argument('--database', '-d', required=True, help='Database to search
parser.add_argument('--output', '-o', required=True, help='The result directory. Will contain info.json and results.tsv.')
args = parser.parse_args()
makedirs(args.output, exist_ok=True)
reduce_headers_tool = path.dirname(__file__) + '/reduce_fasta_headers_to_enumeration.py'
subprocess.run([reduce_headers_tool, "-f", args.fasta, "-d", args.output])
# Aproach:
# directory for output
# info.json -> Tool info
......@@ -26,11 +32,10 @@ toolconfig = {
}
# find version
output = subprocess.run([ghostx_tool], stderr=subprocess.PIPE)
text =output.stderr.decode('ascii')
text = output.stderr.decode('ascii')
result = re.search('version (.*)', text)
toolconfig['version'] = result.group(1)
makedirs(args.output, exist_ok=True)
with open(args.output + '/info.json', 'w') as f:
json.dump(toolconfig, f)
system(ghostx_tool + " aln -d " + args.database + " -o " + args.output + "/results.tsv -i " + args.fasta)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment