Skip to content
Snippets Groups Projects
Commit 7a52f072 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Fix bug: hmmer and ghostx don't include sequences with no results

parent cda62f3e
No related branches found
No related tags found
No related merge requests found
...@@ -4,6 +4,7 @@ import sys ...@@ -4,6 +4,7 @@ import sys
import json import json
import argparse import argparse
from os import path from os import path
import glob
import subprocess import subprocess
parser = argparse.ArgumentParser(description='Convert ghostx results to json documents') parser = argparse.ArgumentParser(description='Convert ghostx results to json documents')
...@@ -55,6 +56,20 @@ with open(result_filename) as f: ...@@ -55,6 +56,20 @@ with open(result_filename) as f:
results.append(result) results.append(result)
# add entries for files with no results
queries = []
path = "*.ids"
for filename in glob.glob(path):
with open(filename) as f:
for line in f:
if line.startswith('>'):
queries.append(line.split()[0].strip().lstrip('>'))
else:
queries.append(line.split()[0].strip())
for query_id in queries:
if not query_id in documents:
documents[query_id] = {"id": query_id, "computations": [{'tool': tool, 'results':[]}]}
output_filename = args.output output_filename = args.output
with open(output_filename, 'w') as o: with open(output_filename, 'w') as o:
json.dump(documents, o) json.dump(documents, o)
......
...@@ -12,12 +12,14 @@ args = parser.parse_args() ...@@ -12,12 +12,14 @@ args = parser.parse_args()
# Provide a list of all query sequence names for conversion process # Provide a list of all query sequence names for conversion process
queries = [] queries = []
path = "*_enum_headers.tsv" path = "*.ids"
for filename in glob.glob(path): for filename in glob.glob(path):
with open(filename) as f: with open(filename) as f:
for line in f: for line in f:
if line.startswith('>'): if line.startswith('>'):
queries.append(line.split()[0].strip().lstrip('>')) queries.append(line.split()[0].strip().lstrip('>'))
else:
queries.append(line.split()[0].strip())
filename = args.result + "/domtblout.tsv" filename = args.result + "/domtblout.tsv"
documents = {} documents = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment