Commit 05fd6a68 authored by hmueller's avatar hmueller
Browse files

Add script to restore original seq ids in json results file from enumeration.

parent 045ac89e
import json
import argparse
parser = argparse.ArgumentParser(description='Substitute enumerated id of sequence with original identifier')
parser.add_argument('--json', '-j', required=True, help='The results json file')
parser.add_argument('--enum-headers', '-e', required=True, help='The enumerated original headers in tsv format')
args = parser.parse_args()
header_dict = {}
documents = {}
with open(args.json) as j:
documents = json.load(j)
with open(args.enum_headers) as h:
for line in h:
num, header = line.strip().split('\t')
header_dict[num] = header
documents["id"] = header_dict[documents["id"]].split()[0]
with open(args.json, 'w') as o:
json.dump(documents, o)
