Skip to content
Snippets Groups Projects
Commit 42345e6a authored by hmueller's avatar hmueller
Browse files

Add script for exchange of fasta headers with enumeration.

parent f67b0373
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/python3
import os
import argparse
parser = argparse.ArgumentParser(description = 'Reduce fasta headers to unique number and keep a file with original headers')
parser.add_argument('--fasta', '-f', required = True, help = 'The fasta file')
args = parser.parse_args()
fasta_in = args.fasta
fasta_out = "{0}.{2}.{1}".format(*os.path.basename(fasta_in).rsplit('.', 1), 'enumerated')
headers_dict = {}
identifier = 1
o = open(fasta_out, 'w')
with open(fasta_in) as f:
for line in f:
if line.startswith(">"):
header = line.strip().lstrip('>')
headers_dict[identifier] = header
o.write(">{}\n".format(identifier))
identifier += 1
else:
o.write(line)
o.close()
headers_dict_file = 'header_dictionary.tsv'
with open(headers_dict_file, 'w') as o:
for key in headers_dict:
o.write("{}\t{}\n".format(key, headers_dict[key]))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment