Commit 42345e6a authored by hmueller's avatar hmueller
Browse files

Add script for exchange of fasta headers with enumeration.

parent f67b0373
#!/usr/bin/python3
import os
import argparse
parser = argparse.ArgumentParser(description = 'Reduce fasta headers to unique number and keep a file with original headers')
parser.add_argument('--fasta', '-f', required = True, help = 'The fasta file')
args = parser.parse_args()
fasta_in = args.fasta
fasta_out = "{0}.{2}.{1}".format(*os.path.basename(fasta_in).rsplit('.', 1), 'enumerated')
headers_dict = {}
identifier = 1
o = open(fasta_out, 'w')
with open(fasta_in) as f:
for line in f:
if line.startswith(">"):
header = line.strip().lstrip('>')
headers_dict[identifier] = header
o.write(">{}\n".format(identifier))
identifier += 1
else:
o.write(line)
o.close()
headers_dict_file = 'header_dictionary.tsv'
with open(headers_dict_file, 'w') as o:
for key in headers_dict:
o.write("{}\t{}\n".format(key, headers_dict[key]))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment