Commit 61c16030 authored by hmueller's avatar hmueller
Browse files

Include fasta header conversion and restoration steps in targetp module scripts.

parent a2f733d3
......@@ -3,8 +3,10 @@
import sys
import json
import argparse
from os import path
import subprocess
parser = argparse.ArgumentParser(description='Convert targetp results to json documents')
parser = argparse.ArgumentParser(description='Convert targetp results to json document')
parser.add_argument('--result', '-r', required=True, help='The targetp results directory')
parser.add_argument('--output', '-o', required=True, help='The converted results json file')
args = parser.parse_args()
......@@ -53,3 +55,7 @@ output_filename = args.output
with open(output_filename, 'w') as o:
json.dump(documents, o)
# Replace sequences' enumerated ids with their original ids
restore_seq_ids_tool = path.dirname(__file__) + '/restore_seq_id_from_enumeration.py'
subprocess.run([restore_seq_ids_tool, '-j', output_filename, '-e', args.result + '/enum_headers.tsv'])
......@@ -3,19 +3,24 @@
import env
import argparse
from psot import config
from os import system,makedirs
from os import system,makedirs,path
import subprocess
targetp_tool = config.load_config()['tools'].get('targetp', 'targetp')
org_flags = {'plant': '-P', 'non-plant': '-N'}
parser = argparse.ArgumentParser(description = 'Determine subcellular locations of eukaryotic amino acid sequences')
parser.add_argument('--fasta', '-f', required = True, help = 'A fasta file with amino acid sequences')
parser.add_argument('--organism_group', choices = org_flags.keys(), required = True, help = 'Define wether to use plant/non-plant networks')
parser.add_argument('--output', required = True, help = 'The results directory. Will contain results.txt and enum_headers.tsv.')
parser = argparse.ArgumentParser(description='Determine subcellular locations of eukaryotic amino acid sequences')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with amino acid sequences')
parser.add_argument('--organism_group', choices=org_flags.keys(), required=True, help='Define wether to use plant/non-plant networks')
parser.add_argument('--output', required=True, help='The results directory. Will contain results.txt and enum_headers.tsv.')
args = parser.parse_args()
makedirs(args.output, exist_ok=True)
# Swap fasta headers for unique numbers to avoid truncation
reduce_headers_tool = path.dirname(__file__) + '/reduce_fasta_headers_to_enumeration.py'
subprocess.run([reduce_headers_tool, "-f", args.fasta, "-d", args.output])
results_file = args.output + '/results.txt'
system(targetp_tool + " " + org_flags[args.organism_group] + " " + args.fasta + " > " + results_file)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment