Commit ad087cc8 authored by hmueller's avatar hmueller
Browse files

Define enumerated header filename in run scripts and pass it to header...

Define enumerated header filename in run scripts and pass it to header enumeration script instead of passing result directory.
parent 610ed0d9
......@@ -5,7 +5,7 @@ import fileinput
parser = argparse.ArgumentParser(description='Replaces fasta headers with unique numbers and saves a dictionary of both in tsv format. Caution: The original fasta file gets replaced in the process.')
parser.add_argument('--fasta', '-f', required=True, help='The fasta file')
parser.add_argument('--enum-headers-dir', '-d', default='.', help='Target directory for headers dictionary file enum_headers.tsv')
parser.add_argument('--enum-headers', '-e', required=True, help='File to store enumerated headers in tsv format')
args = parser.parse_args()
fasta = args.fasta
......@@ -22,7 +22,7 @@ with fileinput.FileInput(fasta, inplace=True) as f:
else:
print(line, end='')
enum_headers_file = args.enum_headers_dir + '/enum_headers.tsv'
enum_headers_file = args.enum_headers
with open(enum_headers_file, 'w') as o:
for key in headers_dict:
......
......@@ -13,14 +13,14 @@ ghostx_tool = config.load_config()['tools'].get('ghostx', 'ghostx')
parser = argparse.ArgumentParser(description='Identify homologues in the swissprot database')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
parser.add_argument('--database', '-d', required=True, help='Database to search in')
parser.add_argument('--output', '-o', required=True, help='The result directory. Will contain info.json and results.tsv.')
parser.add_argument('--output', '-o', required=True, help='The result directory. Will contain info.json, results.tsv and enum_headers.tsv.')
args = parser.parse_args()
makedirs(args.output, exist_ok=True)
# Swap fasta headers for unique numbers to save ghostx from dealing with complex headers
reduce_headers_tool = path.dirname(__file__) + '/reduce_fasta_headers_to_enumeration.py'
subprocess.run([reduce_headers_tool, "-f", args.fasta, "-d", args.output])
subprocess.run([reduce_headers_tool, "-f", args.fasta, "-e", args.output + '/enum_headers.tsv'])
# Aproach:
# directory for output
......
......@@ -13,14 +13,14 @@ org_flags = {'plant': '-P', 'non-plant': '-N'}
parser = argparse.ArgumentParser(description='Determine subcellular locations of eukaryotic amino acid sequences')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with amino acid sequences')
parser.add_argument('--organism_group', choices=org_flags.keys(), required=True, help='Define wether to use plant/non-plant networks')
parser.add_argument('--output', required=True, help='The results directory. Will contain results.txt and enum_headers.tsv.')
parser.add_argument('--output', required=True, help='The result directory. Will contain results.txt and enum_headers.tsv.')
args = parser.parse_args()
makedirs(args.output, exist_ok=True)
# Swap fasta headers for unique numbers to avoid truncation
reduce_headers_tool = path.dirname(__file__) + '/reduce_fasta_headers_to_enumeration.py'
subprocess.run([reduce_headers_tool, "-f", args.fasta, "-d", args.output])
subprocess.run([reduce_headers_tool, "-f", args.fasta, "-e", args.output + '/enum_headers.tsv'])
results_file = args.output + '/results.txt'
system(targetp_tool + " " + org_flags[args.organism_group] + " " + args.fasta + " > " + results_file)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment