Commit dad7e862 authored by Lukas Jelonek's avatar Lukas Jelonek
Browse files

Add feature: dbxref lookup (closes #23)

Sem-Ver: feature
parent a0fdafcd
......@@ -24,6 +24,7 @@ def main():
analyze_parser.add_argument('--profile', '-p', default='fast', help='The profile to use')
analyze_parser.add_argument('--live', '-l', action='store_true', help='Report results as they are computed, not only at the end of the computation. The live results will be available in the $output/live.')
analyze_parser.add_argument('--config', '-c', help='The config to use')
analyze_parser.add_argument('--fetch_informations', '-i', action='store_true', help='Fetch informations')
analyze_parser.add_argument('--debug', '-d', action='store_true', help='Debug mode, computation directory will not be removed after computation')
analyze_parser.add_argument('--execution_dir', '-e', help='Use the specified execution directory and do not delete it after the computation')
analyze_parser.add_argument('--use_cluster', '-C', action='store_true', help='Use compute cluster for execution')
......@@ -59,6 +60,7 @@ def generate_execution(config, args):
execution = {}
execution['debug'] = args.debug
execution['use_cluster'] = args.use_cluster
execution['fetch_informations'] = args.fetch_informations
execution['mode'] = 'live' if args.live else 'complete'
execution['bin_path'] = config['app']['bin_path']
execution['script_path'] = config['app']['script_path']
......
......@@ -62,6 +62,20 @@ process convert_${id}_to_json {
"""
}
''')
convert_info_template = Template ('''
process convert_${id}_to_json {
input:
file result from ${id}_results
output:
file "$${result}.json" into ${id}_json_info
script:
"""
${converter_script} --result $$result --output $${result}.json ${converter_params}
"""
}
''')
convert_template = Template ('''
process convert_${id}_to_json {
input:
......@@ -76,18 +90,33 @@ process convert_${id}_to_json {
"""
}
''')
fetch_template = Template('''
process fetch_dbxrefs {
retrieve_informations_template = Template('''
process retrieve_informations_for_${id} {
input:
file result from ${id}_fetch_json
file result from ${id}_json_info
output:
file "$${result}_fetched.json" into ${id}_json
file "$${result.simpleName}_info.json" into ${id}_json
script:
"""
resolve_dbxrefs.py --input $$result --retreiver ../dbxref/scripts --output $${result}_fetched.json
resolve_dbxrefs.py --input $$result --output $${result.simpleName}_info.json
"""
}
''')
retrieve_informations_live_template = Template('''
process retrieve_informations_for_${id} {
input:
file result from ${id}_json_info
output:
file "$${result.simpleName}_info.json" into ${id}_json, ${id}_json_live
script:
"""
resolve_dbxrefs.py --input $$result --output $${result.simpleName}_info.json
"""
}
''')
......@@ -168,11 +197,20 @@ Channel.fromPath(params.fasta).set{fasta}''')
config['clusterOptions'] = ''
fragments.append(analysis_template.substitute(config))
if execution['mode'] == 'live':
if execution['mode'] == 'live' and not execution['fetch_informations']:
fragments.append(convert_live_template.substitute(flatten(m)))
copy = deepcopy(m)
copy['output'] = execution['output']
fragments.append(live_results_template.substitute(flatten(copy)))
elif execution['mode'] == 'live' and execution['fetch_informations']:
fragments.append(convert_info_template.substitute(flatten(m)))
fragments.append(retrieve_informations_live_template.substitute(flatten(m)))
copy = deepcopy(m)
copy['output'] = execution['output']
fragments.append(live_results_template.substitute(flatten(copy)))
elif execution['mode'] == 'complete' and execution['fetch_informations']:
fragments.append(convert_info_template.substitute(flatten(m)))
fragments.append(retrieve_informations_template.substitute(flatten(m)))
else:
fragments.append(convert_template.substitute(flatten(m)))
......
import unittest
import pkg_resources
from tempfile import mkdtemp
from shutil import rmtree
from subprocess import run
import os.path
import json
class TestFetchInformations(unittest.TestCase):
def test_that_psot_runs_with_fetch_informations(self):
self.run_psot(['-i'])
def test_that_psot_runs_with_fetch_informations_and_live_results(self):
self.run_psot(['-i', '-l'])
def run_psot(self, parameters):
testdata = pkg_resources.resource_filename('tests', 'data/single.fas')
result_directory = mkdtemp()
execution = ["psot", 'analyze', '-f', testdata, '-o', result_directory, '-p', 'complete']
execution.extend(parameters)
proc = run(execution)
# first check if the result file exists
expected_file = result_directory +'/seq_1.cds_10.json'
self.assertTrue(os.path.exists(expected_file))
# now check if it is a valid json and contains the id and a non-empty list of results
with open(expected_file, 'r') as f:
result = json.load(f)
self.assertTrue('id' in result)
self.assertTrue('computations' in result)
self.assertGreater(len(result['computations']), 0)
rmtree(result_directory)
......@@ -28,7 +28,5 @@ class TestPsot(unittest.TestCase):
self.assertTrue('id' in result)
self.assertTrue('computations' in result)
self.assertGreater(len(result['computations']), 0)
rmtree(result_directory)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment