Commit 7a52f072 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Fix bug: hmmer and ghostx don't include sequences with no results

parent cda62f3e
......@@ -4,6 +4,7 @@ import sys
import json
import argparse
from os import path
import glob
import subprocess
parser = argparse.ArgumentParser(description='Convert ghostx results to json documents')
......@@ -55,6 +56,20 @@ with open(result_filename) as f:
results.append(result)
# add entries for files with no results
queries = []
path = "*.ids"
for filename in glob.glob(path):
with open(filename) as f:
for line in f:
if line.startswith('>'):
queries.append(line.split()[0].strip().lstrip('>'))
else:
queries.append(line.split()[0].strip())
for query_id in queries:
if not query_id in documents:
documents[query_id] = {"id": query_id, "computations": [{'tool': tool, 'results':[]}]}
output_filename = args.output
with open(output_filename, 'w') as o:
json.dump(documents, o)
......
......@@ -12,12 +12,14 @@ args = parser.parse_args()
# Provide a list of all query sequence names for conversion process
queries = []
path = "*_enum_headers.tsv"
path = "*.ids"
for filename in glob.glob(path):
with open(filename) as f:
for line in f:
if line.startswith('>'):
queries.append(line.split()[0].strip().lstrip('>'))
else:
queries.append(line.split()[0].strip())
filename = args.result + "/domtblout.tsv"
documents = {}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment