Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SOaAS
psot.repository
Commits
6f1bf564
Commit
6f1bf564
authored
Dec 06, 2017
by
hmueller
Browse files
Include fasta header conversion and restoration steps in ghostx module scripts.
parent
05fd6a68
Changes
4
Hide whitespace changes
Inline
Side-by-side
scripts/convert_ghostx.py
View file @
6f1bf564
#!/usr/bin/python3
import
sys
import
json
import
argparse
from
os
import
path
import
subprocess
parser
=
argparse
.
ArgumentParser
(
description
=
'Convert ghostx results to json documents'
)
parser
.
add_argument
(
'--result'
,
'-r'
,
required
=
True
,
help
=
'The ghostx result directory'
)
...
...
@@ -55,3 +58,6 @@ with open(result_filename) as f:
output_filename
=
args
.
output
with
open
(
output_filename
,
'w'
)
as
o
:
json
.
dump
(
documents
,
o
)
restore_seq_ids_tool
=
path
.
dirname
(
__file__
)
+
'/restore_seq_id_from_enumeration.py'
subprocess
.
run
([
restore_seq_ids_tool
,
'-j'
,
output_filename
,
'-e'
,
args
.
result
+
'/enum_headers.tsv'
])
scripts/reduce_fasta_headers_to_enumeration.py
View file @
6f1bf564
...
...
@@ -2,9 +2,11 @@
import
argparse
import
fileinput
import
os
parser
=
argparse
.
ArgumentParser
(
description
=
'Replaces fasta headers with unique numbers and s
tores original headers
in tsv f
ile
'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Replaces fasta headers with unique numbers and s
aves both
in tsv f
ormat
'
)
parser
.
add_argument
(
'--fasta'
,
'-f'
,
required
=
True
,
help
=
'The fasta file'
)
parser
.
add_argument
(
'--enum-headers-dir'
,
'-d'
,
default
=
'.'
,
help
=
'Target directory for headers file enum_headers.tsv'
)
args
=
parser
.
parse_args
()
fasta
=
args
.
fasta
...
...
@@ -21,8 +23,7 @@ with fileinput.FileInput(fasta, inplace=True) as f:
else
:
print
(
line
,
end
=
''
)
headers_dict_file
=
'enum_header_dict.tsv'
with
open
(
headers_dict_file
,
'w'
)
as
o
:
enum_headers_file
=
args
.
enum_headers_dir
+
'/enum_headers.tsv'
with
open
(
enum_headers_file
,
'w'
)
as
o
:
for
key
in
headers_dict
:
o
.
write
(
"{}
\t
{}
\n
"
.
format
(
key
,
headers_dict
[
key
]))
scripts/restore_seq_id_from_enumeration.py
View file @
6f1bf564
...
...
@@ -8,18 +8,24 @@ parser.add_argument('--json', '-j', required=True, help='The results json file')
parser
.
add_argument
(
'--enum-headers'
,
'-e'
,
required
=
True
,
help
=
'The enumerated original headers in tsv format'
)
args
=
parser
.
parse_args
()
header
_dict
=
{}
doc
uments
=
{}
seq_id
_dict
=
{}
doc
s_enumerated
=
{}
with
open
(
args
.
json
)
as
j
:
doc
uments
=
json
.
load
(
j
)
doc
s_enumerated
=
json
.
load
(
j
)
with
open
(
args
.
enum_headers
)
as
h
:
for
line
in
h
:
num
,
header
=
line
.
strip
().
split
(
'
\t
'
)
header
_dict
[
num
]
=
header
seq_id
_dict
[
num
]
=
header
.
split
()[
0
]
documents
[
"id"
]
=
header_dict
[
documents
[
"id"
]].
split
()[
0
]
documents_restored
=
{}
for
num
in
docs_enumerated
:
seq_id
=
seq_id_dict
[
num
]
doc
=
docs_enumerated
[
num
]
doc
[
"id"
]
=
seq_id
documents_restored
[
seq_id
]
=
doc
with
open
(
args
.
json
,
'w'
)
as
o
:
json
.
dump
(
documents
,
o
)
json
.
dump
(
documents
_restored
,
o
)
scripts/run_ghostx.py
View file @
6f1bf564
#!/usr/bin/env python3
import
env
import
argparse
import
re
from
os
import
system
,
makedirs
from
os
import
system
,
makedirs
,
path
from
psot
import
config
import
subprocess
import
json
...
...
@@ -15,6 +16,11 @@ parser.add_argument('--database', '-d', required=True, help='Database to search
parser
.
add_argument
(
'--output'
,
'-o'
,
required
=
True
,
help
=
'The result directory. Will contain info.json and results.tsv.'
)
args
=
parser
.
parse_args
()
makedirs
(
args
.
output
,
exist_ok
=
True
)
reduce_headers_tool
=
path
.
dirname
(
__file__
)
+
'/reduce_fasta_headers_to_enumeration.py'
subprocess
.
run
([
reduce_headers_tool
,
"-f"
,
args
.
fasta
,
"-d"
,
args
.
output
])
# Aproach:
# directory for output
# info.json -> Tool info
...
...
@@ -26,11 +32,10 @@ toolconfig = {
}
# find version
output
=
subprocess
.
run
([
ghostx_tool
],
stderr
=
subprocess
.
PIPE
)
text
=
output
.
stderr
.
decode
(
'ascii'
)
text
=
output
.
stderr
.
decode
(
'ascii'
)
result
=
re
.
search
(
'version (.*)'
,
text
)
toolconfig
[
'version'
]
=
result
.
group
(
1
)
makedirs
(
args
.
output
,
exist_ok
=
True
)
with
open
(
args
.
output
+
'/info.json'
,
'w'
)
as
f
:
json
.
dump
(
toolconfig
,
f
)
system
(
ghostx_tool
+
" aln -d "
+
args
.
database
+
" -o "
+
args
.
output
+
"/results.tsv -i "
+
args
.
fasta
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment