Skip to content

Commit

Permalink
Merge pull request #77 from rabix/bugfix/list_of_types
Browse files Browse the repository at this point in the history
Bugfix/list of types
  • Loading branch information
pavlemarinkovic authored Oct 9, 2023
2 parents ac9b6a0 + 0580045 commit 44aefc2
Show file tree
Hide file tree
Showing 9 changed files with 198 additions and 84 deletions.
2 changes: 1 addition & 1 deletion sbpack/noncwl/copy.py → sbpack/noncwl/copy_app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import argparse
import logging
import sbpack.lib as lib
from sevenbridges.errors import NotFound
# from sevenbridges.errors import NotFound
from sbpack.noncwl.utils import install_or_upgrade_app

logger = logging.getLogger(__name__)
Expand Down
89 changes: 50 additions & 39 deletions sbpack/noncwl/nextflow.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import re
import ruamel.yaml
import json
import argparse
import logging
Expand All @@ -22,6 +21,7 @@
update_schema_code_package,
install_or_upgrade_app,
validate_inputs,
nf_schema_type_mapper,
GENERIC_FILE_ARRAY_INPUT,
GENERIC_OUTPUT_DIRECTORY,
WRAPPER_REQUIREMENTS,
Expand All @@ -32,7 +32,6 @@
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

PACKAGE_SIZE_LIMIT = 100 * 1024 * 1024 # MB
NF_SCHEMA_DEFAULT_NAME = 'nextflow_schema.json'


Expand All @@ -50,30 +49,6 @@ def __init__(self, workflow_path, dump_schema=False, sb_doc=None):
self.output_schemas = None
self.input_schemas = None

@staticmethod
def nf_schema_type_mapper(input_type_string):
"""
Convert nextflow schema input type to CWL
"""
type_ = input_type_string.get('type', 'string')
format_ = input_type_string.get('format', '')
if type_ == 'string' and 'path' in format_:
if format_ == 'file-path':
return ['File']
if format_ == 'directory-path':
return ['Directory']
if format_ == 'path':
return ['File']
if type_ == 'string':
return ['string']
if type_ == 'integer':
return ['int']
if type_ == 'number':
return ['float']
if type_ == 'boolean':
return ['boolean']
return [type_]

@staticmethod
def nf_cwl_port_map():
"""
Expand All @@ -84,19 +59,43 @@ def nf_cwl_port_map():
'default': 'sbg:toolDefaultValue',
'description': 'label',
'help_text': 'doc',
'mimetype': 'format',
'fa_icon': 'sbg:icon',
'pattern': 'sbg:pattern',
'hidden': 'sbg:hidden',
}

@staticmethod
def nf_cwl_category_map():
"""
Mappings of nextflow definition fields to SB category fields
nextflow_key: cwl_key mapping
"""
return {
'title': 'sbg:title',
'description': 'sbg:doc',
'fa_icon': 'sbg:icon',
}

def nf_to_sb_input_mapper(self, port_id, port_data):
def nf_to_sb_input_mapper(self, port_id, port_data, category=None):
"""
Convert a single input from Nextflow schema to SB schema
"""
sb_input = dict()
sb_input['id'] = port_id
sb_input['type'] = self.nf_schema_type_mapper(port_data)
sb_input['type'] = nf_schema_type_mapper(port_data)
sb_input['type'].append('null')
if category:
sb_input['sbg:category'] = category
for nf_field, sb_field in self.nf_cwl_port_map().items():
if nf_field in port_data:
sb_input[sb_field] = port_data[nf_field]
value = port_data[nf_field]
if value == ":" and nf_field == 'default':
# Bug prevents running a task if an input's
# default value is exactly ":"
value = " :"
sb_input[sb_field] = value

sb_input['inputBinding'] = {
'prefix': f'--{port_id}',
}
Expand All @@ -108,18 +107,29 @@ def collect_nf_definition_properties(self, definition):
definition contains multiple properties
"""
cwl_inputs = list()
sb_category = dict()

for nf_field, sb_field in self.nf_cwl_category_map().items():
if nf_field in definition:
sb_category[sb_field] = definition[nf_field]

input_category = 'Inputs'
if 'title' in definition:
input_category = sb_category['sbg:title']

for port_id, port_data in definition['properties'].items():
cwl_inputs.append(self.nf_to_sb_input_mapper(
port_id,
port_data,
category=input_category,
))
# Nextflow schema field "required" lists input_ids
# for required inputs.
# Reason we are not using definition.get('required', []) any longer
# is that some inputs can be contained in the profile. This means
# that they do not have to be provided explicitly through the
# command line.
return cwl_inputs
return cwl_inputs, sb_category

def nf_schema_build(self):
"""
Expand All @@ -146,7 +156,7 @@ def nf_schema_build(self):
@staticmethod
def file_is_nf_schema(path):
try:
schema = ruamel.yaml.safe_load(path)
schema = yaml.safe_load(path)
if 'definitions' not in schema:
return False
if type(schema['definitions']) is not dict:
Expand Down Expand Up @@ -175,15 +185,17 @@ def generate_sb_inputs(self, manual_validation=False):

if self.nf_schema_path:
with open(self.nf_schema_path, 'r') as f:
nf_schema = ruamel.yaml.safe_load(f)
nf_schema = yaml.safe_load(f)

for p_key, p_value in nf_schema.get('properties', {}).items():
cwl_inputs.append(
self.nf_to_sb_input_mapper(p_key, p_value))
for def_name, definition in nf_schema.get(
'definitions', {}).items():
cwl_inputs.extend(
self.collect_nf_definition_properties(definition))
inputs, category = self.collect_nf_definition_properties(
definition)
cwl_inputs.extend(inputs)
# add category to schema

if self.input_schemas:
for file in self.input_schemas:
Expand Down Expand Up @@ -354,7 +366,7 @@ def parse_output_yml(self, yml_file):
:return: list of outputs in CWL format.
"""
outputs = list()
yml_schema = ruamel.yaml.safe_load(yml_file)
yml_schema = yaml.safe_load(yml_file)

for key, value in yml_schema.items():
# Tower yml file can use "tower" key in the yml file to designate
Expand Down Expand Up @@ -525,7 +537,7 @@ def main():
)
parser.add_argument(
"--revision-note", required=False,
default=None, type=str, nargs="+",
default=None, type=str,
help="Revision note to be placed in the CWL schema if the app is "
"uploaded to the sbg platform.",
)
Expand Down Expand Up @@ -608,10 +620,9 @@ def main():
revision_note = f"Uploaded using sbpack v{__version__}"

if args.revision_note:
revision_note = str(" ".join(args.revision_note))
revision_note = str(args.revision_note)

if not args.sb_schema:
sb_app["sbg:revisionNotes"] = revision_note
sb_app["sbg:revisionNotes"] = revision_note

install_or_upgrade_app(api, args.appid, sb_app)

Expand Down
122 changes: 97 additions & 25 deletions sbpack/noncwl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import json
import yaml
import re

from sbpack.pack import pack
from sevenbridges.errors import NotFound

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

PACKAGE_SIZE_LIMIT = 100 * 1024 * 1024
PACKAGE_SIZE_LIMIT = 256 * 1024 * 1024 - 1 # ~256 MB

# A generic SB input array of files that should be available on the
# instance but are not explicitly provided to the execution as wdl params.
Expand Down Expand Up @@ -67,6 +69,45 @@ class EXTENSIONS:
all_ = [yaml, yml, json, cwl]


def nf_schema_type_mapper(input_type_string):
"""
Convert nextflow schema input type to CWL
"""
type_ = input_type_string.get('type', 'string')
format_ = input_type_string.get('format', '')

return type_mapper(type_, format_)


def type_mapper(type_, format_):
if isinstance(type_, str):
if type_ == 'string' and 'path' in format_:
if format_ == 'file-path':
return ['File']
if format_ == 'directory-path':
return ['Directory']
if format_ == 'path':
return ['File']
if type_ == 'string':
return ['string']
if type_ == 'integer':
return ['int']
if type_ == 'number':
return ['float']
if type_ == 'boolean':
return ['boolean']
if type_ == 'object':
# this should be a record type (dictionary)
# it is provided as '{"key1": "value1", "key2": "value2"}'
return ['string']
return [type_]
elif isinstance(type_, list):
temp_type_list = []
for m in type_:
temp_type_list.extend(type_mapper(m, format_))
return temp_type_list


def create_profile_enum(profiles: list):
"""
If profiles are defined in the config file, this input stores the profiles
Expand Down Expand Up @@ -151,12 +192,41 @@ def zip_and_push_to_sb(api, workflow_path, project_id, folder_name):
for packages on SevenBridges Platform. Delete local .zip file.
"""

basename = os.path.basename(os.path.abspath(workflow_path)) + '_' + \
time.strftime("%Y%m%d-%H%M%S")

zip_path = os.path.join(os.path.dirname(workflow_path), basename + '.zip')
shutil.make_archive(zip_path[:-4], 'zip', root_dir=workflow_path,
base_dir='./')
# This will create a temporary directory that will store all files from the
# original directory, except for the .git hidden directory. This dir
# sometimes collects a large amount of files that will not be used by the
# tool, and can increase the size of the archive up to 10 times.

source_path = os.path.abspath(workflow_path)
destination_path = source_path + '_' + time.strftime("%Y%m%d-%H%M%S")
zip_path = destination_path + '.zip'
os.mkdir(destination_path)

for root, dirs, files in os.walk(workflow_path):
pattern = re.compile(r'(?:^|.*/)\.git(?:$|/.*)')
if re.match(pattern, root):
continue

dirs = [d for d in dirs if not re.match(pattern, d)]
for d in dirs:
source_file = os.path.join(root, d)
directory_path = os.path.join(destination_path, os.path.relpath(
source_file, workflow_path))
if not os.path.exists(directory_path):
os.mkdir(directory_path)

for file in files:
source_file = os.path.join(root, file)
dest_file = os.path.join(destination_path, os.path.relpath(
source_file, workflow_path))
shutil.copy2(source_file, dest_file)

shutil.make_archive(
destination_path,
'zip',
root_dir=destination_path,
base_dir='./'
)

if os.path.getsize(zip_path) > PACKAGE_SIZE_LIMIT:
logger.error(f"File size too big: {os.path.getsize(zip_path)}")
Expand Down Expand Up @@ -184,7 +254,10 @@ def zip_and_push_to_sb(api, workflow_path, project_id, folder_name):
print(f'Upload complete!')

os.remove(zip_path)
print(f'Local file {zip_path} deleted.')
print(f'Temporary local file {zip_path} deleted.')

shutil.rmtree(destination_path)
print(f'Temporary local folder {destination_path} deleted.')

return uploaded_file_id

Expand Down Expand Up @@ -249,10 +322,11 @@ def parse_config_file(file_path):
with open(file_path, 'r') as file:
config = file.read()

trace_pattern = re.compile(r"trace\s\{.*}", re.MULTILINE | re.DOTALL)
if re.findall(trace_pattern, config):
logger.warning("Detected `trace` in nextflow config. This "
"functionality is currently not supported.")
# Trace
# trace_pattern = re.compile(r"trace\s\{.*}", re.MULTILINE | re.DOTALL)
# if re.findall(trace_pattern, config):
# logger.warning("Detected `trace` in nextflow config. This "
# "functionality is currently not supported.")
found_profiles = False
brackets = 0

Expand All @@ -270,10 +344,12 @@ def parse_config_file(file_path):

# Extract profiles using regex
profiles = {}
pattern = re.compile(r'^\s*(\w+)\s*{([^}]+)}', re.MULTILINE | re.DOTALL)
pattern = re.compile(r'\s*(\w+)\s*{([^}]+)}', re.MULTILINE | re.DOTALL)
blocks = re.findall(pattern, profiles_text)
for name, content in blocks:
settings = dict(re.findall(r'\s*([a-zA-Z.]+)\s*=\s*(.*)', content))
settings = dict(
re.findall(r'([a-zA-Z._]+)(?:\s+|)=(?:\s+|)([^\s]+)', content)
)
profiles[name] = settings
include_path = re.findall(
r'includeConfig\s+[\'\"]([a-zA-Z_.\\/]+)[\'\"]', content)
Expand All @@ -289,23 +365,19 @@ def update_schema_code_package(sb_schema, schema_ext, new_code_package):
"""
Update the package in the sb_schema
"""

sb_schema_dict = pack(sb_schema)
sb_schema_dict['app_content']['code_package'] = new_code_package

if schema_ext.lower() == EXTENSIONS.json:
with open(sb_schema, 'r') as file:
sb_schema_json = json.load(file)
sb_schema_json['app_content']['code_package'] = new_code_package
with open(sb_schema, 'w') as file:
json.dump(sb_schema_json, file)

return sb_schema_json
json.dump(sb_schema_dict, file)

elif schema_ext.lower() in EXTENSIONS.yaml_all:
with open(sb_schema, 'r') as file:
sb_schema_yaml = yaml.safe_load(file)
sb_schema_yaml['app_content']['code_package'] = new_code_package
with open(sb_schema, 'w') as file:
yaml.dump(sb_schema_yaml, file)
yaml.dump(sb_schema_dict, file)

return sb_schema_yaml
return sb_schema_dict


def install_or_upgrade_app(api, app_id, sb_app_raw):
Expand Down
2 changes: 1 addition & 1 deletion sbpack/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2023.06.20rc3"
__version__ = "2023.06.20rc4"
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
'sbpull = sbpack.unpack:main',
'sbpack_nf = sbpack.noncwl.nextflow:main',
'sbpack_wdl = sbpack.noncwl.wdl:main',
'sbcopy = sbpack.noncwl.copy:main',
'sbcopy = sbpack.noncwl.copy_app:main',
],
},

Expand Down
Loading

0 comments on commit 44aefc2

Please sign in to comment.