Commit 5b3d5301 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Add step that includes the sequence

parent ab24c60f
#!/usr/bin/python3
import sys
import json
import argparse
parser = argparse.ArgumentParser(description='Convert a fasta file into a json document')
parser.add_argument('--result', '-r', required=True, help='The fasta file result directory')
parser.add_argument('--output', '-o', required=True, help='The json file')
args = parser.parse_args()
documents = {}
with open(args.result) as f:
# second scan for data
entry = None
for line in f:
if line.startswith(">"):
id = line.split()[0][1:]
if entry is not None:
documents[entry['id']] = entry
entry = {'id': id, 'sequence': ''}
else:
entry['sequence'] = entry['sequence'] + line.strip()
output_filename = args.output
with open(output_filename, 'w') as o:
json.dump(documents, o)
......@@ -8,6 +8,15 @@ parser.add_argument('jsons', metavar='N', nargs='+', help='json documents')
parser.add_argument('--output', '-o', required=True, help='The name of the output document')
args = parser.parse_args()
def extend(m, o):
for key in o:
if key not in m:
m[key] = o[key]
else:
if type(o[key]) is list:
m[key].extend(o[key])
joined = {}
for file in args.jsons:
with open(file) as f:
......@@ -15,7 +24,7 @@ for file in args.jsons:
for k in doc:
if not k in joined:
joined[k] = {'id': k, 'computations':[]}
joined[k]['computations'].extend(doc[k]['computations'])
extend(joined[k], doc[k])
output_filename = args.output
with open(output_filename, 'w') as o:
......
#!/usr/bin/env python3
import argparse
import config
import re
from os import system,makedirs
import subprocess
import json
parser = argparse.ArgumentParser(description='Script that returns the fasta as it is inserted. Can be used for modules that have no actual analysis.')
parser.add_argument('--fasta', '-f', required=True, help='A fasta file with aminoacid sequences')
parser.add_argument('--output', '-o', required=True, help='The result directory. Will contain info.json and results.tsv.')
args = parser.parse_args()
system('cp ' + args.fasta + " " + args.output)
......@@ -12,6 +12,7 @@ profiles:
- name: 'common'
info: 'Profile that uses commonly used tools'
modules:
include_sequence:
signalp:
blastp_swissprot:
hmmer_pfam_a:
......
# Module manifest for the fasta sequence inclusion
# The name of the module. Is needed for the list-analyses option, for custom
# configurations and custom profiles
name: 'include_sequence'
# Short description of the analysis
info: 'includes the query sequence in the json document'
# The configuration of the script for the analysis step.
analysis:
script: 'run_identity.py'
parameters:
# The configuration of the script for the json conversion step.
converter:
script: 'convert_fasta.py'
parameters:
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment