Commit 045ac89e authored by hmueller's avatar hmueller
Browse files

Change filename and manipulate file in place instead of creating a new one.

parent 42345e6a
import os
import argparse
import fileinput
parser = argparse.ArgumentParser(description = 'Reduce fasta headers to unique number and keep a file with original headers')
parser.add_argument('--fasta', '-f', required = True, help = 'The fasta file')
parser = argparse.ArgumentParser(description='Replaces fasta headers with unique numbers and stores original headers in tsv file')
parser.add_argument('--fasta', '-f', required=True, help='The fasta file')
args = parser.parse_args()
fasta_in = args.fasta
fasta_out = "{0}.{2}.{1}".format(*os.path.basename(fasta_in).rsplit('.', 1), 'enumerated')
fasta = args.fasta
headers_dict = {}
identifier = 1
num = 1
o = open(fasta_out, 'w')
with open(fasta_in) as f:
with fileinput.FileInput(fasta, inplace=True) as f:
for line in f:
if line.startswith(">"):
header = line.strip().lstrip('>')
headers_dict[identifier] = header
identifier += 1
headers_dict[num] = header
num += 1
print(line, end='')
headers_dict_file = 'enum_header_dict.tsv'
headers_dict_file = 'header_dictionary.tsv'
with open(headers_dict_file, 'w') as o:
for key in headers_dict:
o.write("{}\t{}\n".format(key, headers_dict[key]))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment