Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for kvg flatten nested wdl dirs functions #274

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions src/cromshell/utilities/io_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import json
import logging
import re

# import os
import shutil
import tempfile
from contextlib import nullcontext
Expand Down Expand Up @@ -119,9 +117,7 @@ def has_nested_dependencies(wdl_path: str or Path) -> bool:
return False


def get_flattened_filename(
tempdir: tempfile.TemporaryDirectory, wdl_path: str or Path
) -> Path:
def get_flattened_filename(tempdir: str, wdl_path: str or Path) -> Path:
"""Generate hyphen-separated path to use for flattened WDL file path.
For example:
tempdir: /path/2/tempdir/ and wdl_path: /dir/path/2/wdl.wdl
Expand All @@ -131,7 +127,7 @@ def get_flattened_filename(
p = Path(wdl_path)

return Path(
tempdir.name
tempdir
+ "/"
+ re.sub("^-", "", re.sub("/", "-", str(p.parent)))
+ "-"
Expand All @@ -140,27 +136,31 @@ def get_flattened_filename(


def flatten_nested_dependencies(
tempdir: tempfile.TemporaryDirectory, wdl_path: str or Path
tempdir: tempfile.TemporaryDirectory, wdl_path: str
) -> Path:
"""Flatten a WDL directory structure and rewrite imports accordingly.

Return string representing the filesystem location of the rewritten WDL.

tempdir: /path/2/tempdir/
wdl_path: /dir/path/2/wdl.wdl
returns: /path/2/tempdir/dir-path-2-wdl.wdl
"""

p = Path(wdl_path)
wdl_dir = p.parent

new_wdl_path = get_flattened_filename(tempdir, wdl_path)
new_wdl_path = get_flattened_filename(tempdir.name, wdl_path)

with open(wdl_path, "r") as rf, open(new_wdl_path, "w") as wf:
for line in rf:
if line.startswith("import"):
m = re.match(r'import "(.+)"', line)
imported_wdl_name = m.group(1)
imported_wdl_path = (Path(wdl_dir) / imported_wdl_name).absolute()
imported_wdl_path = (Path(wdl_dir) / imported_wdl_name).resolve()
import_line = re.sub(
imported_wdl_name,
Path(get_flattened_filename(tempdir, imported_wdl_path)).name,
Path(get_flattened_filename(tempdir.name, imported_wdl_path)).name,
line,
)

Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def local_cromshell_config_json(local_hidden_cromshell_folder):

@pytest.fixture
def test_workflows_path():
return Path(__file__).joinpath("workflows/")
return Path(__file__).parent.joinpath("workflows/")


@pytest.fixture
Expand Down
93 changes: 93 additions & 0 deletions tests/unit/test_io_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import csv
import io
import os
import re
import shutil
import tempfile
from contextlib import redirect_stdout
from pathlib import Path
from tempfile import NamedTemporaryFile
from zipfile import ZipFile

import pytest
Expand Down Expand Up @@ -328,6 +331,96 @@ def test_update_all_workflow_database_tsv(
):
assert row[column_to_update] == update_value

@pytest.mark.parametrize(
"wdl_content, expected_result",
[
('import "other.wdl"', False), # No nested import
('import "../nested/other.wdl"', True), # Nested import
('import "nested/other.wdl"', False), # Relative path, but not nested
("task my_task { command { echo 'Hello, World!' } }", False), # No import
(
'import "../nested/other.wdl"\nimport "nested/another.wdl"',
True,
), # Multiple imports, one nested
],
)
def test_has_nested_dependencies(self, wdl_content, expected_result):
# Create a temporary file with the provided WDL content
with NamedTemporaryFile(mode="w", delete=False) as temp_file:
temp_file.write(wdl_content)

wdl_path = Path(temp_file.name)

# Call the function with the temporary file path
result = io_utils.has_nested_dependencies(wdl_path)

# Check if the result matches the expected outcome
assert result == expected_result

# Clean up the temporary file
wdl_path.unlink()

@pytest.mark.parametrize(
"wdl_path, flattened_wdl_file",
[
("/dir/path/2/wdl.wdl", "dir-path-2-wdl.wdl"),
("/another/wdl.wdl", "another-wdl.wdl"),
],
)
def test_get_flattened_filename(self, wdl_path, flattened_wdl_file):
# Create a TemporaryDirectory to simulate tempdir
with tempfile.TemporaryDirectory() as tempdir:
# tempdir = Path(tempdir_name)
wdl_path = Path(wdl_path)

# Call the function with the simulated tempdir and wdl_path
result = io_utils.get_flattened_filename(tempdir, wdl_path)

# Check if the result matches the expected outcome
assert result == Path(tempdir).joinpath(flattened_wdl_file)

# Define test cases using @pytest.mark.parametrize
@pytest.mark.parametrize(
"wdl_path, expected_file_content",
[
(
"wdl_with_imports/helloWorld_with_imports.wdl",
["-helloWorld.wdl", "-wdl_with_imports-hello_world_task.wdl"],
),
],
)
def test_flatten_nested_dependencies(
self, wdl_path, expected_file_content, test_workflows_path
):
# Create a temporary directory to simulate tempdir

tempdir = tempfile.TemporaryDirectory()
abs_wdl_path = test_workflows_path.joinpath(wdl_path)

abs_wdl_path_str = str(abs_wdl_path.absolute())

# Call the function with the simulated tempdir and wdl_path
result_path = io_utils.flatten_nested_dependencies(
tempdir=tempdir, wdl_path=abs_wdl_path_str
)

# Check if the result matches the expected outcome
expected_result_path = Path(tempdir.name).joinpath(
re.sub("^-", "", re.sub("/", "-", str(abs_wdl_path)))
)
assert result_path == expected_result_path

# Check if the expected file content is in the result file
for expected_file_content_line in expected_file_content:
parsed_line = (
re.sub("^-", "", re.sub("/", "-", str(abs_wdl_path.parents[1])))
+ expected_file_content_line
)
assert parsed_line in result_path.read_text()

# Clean up the temporary directory
tempdir.cleanup()

@pytest.fixture
def mock_data_path(self):
return Path(__file__).parent.joinpath("mock_data/")
Expand Down
42 changes: 42 additions & 0 deletions tests/workflows/wdl_with_imports/helloWorld_with_imports.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import "../helloWorld.wdl" as HelloWorldWorkflow
import "hello_world_task.wdl" as helloWorldTask

workflow HelloWorld {
meta {
workflow_description: "echos hello world"
}
parameter_meta {
# Description of inputs:
# Required:
docker: "Docker image in which to run"
# Optional:
mem: "Amount of memory to give to the machine running each task in this workflow."
preemptible_attempts: "Number of times to allow each task in this workflow to be preempted."
disk_space_gb: "Amount of storage disk space (in Gb) to give to each machine running each task in this workflow."
cpu: "Number of CPU cores to give to each machine running each task in this workflow."
boot_disk_size_gb: "Amount of boot disk space (in Gb) to give to each machine running each task in this workflow."
}
String docker

Int? mem
Int? preemptible_attempts
Int? disk_space_gb
Int? cpu
Int? boot_disk_size_gb

call helloWorldTask.HelloWorldTask {
input:
docker = docker,
mem = mem,
preemptible_attempts = preemptible_attempts,
disk_space_gb = disk_space_gb,
cpu = cpu,
boot_disk_size_gb = boot_disk_size_gb
}

output {
File output_file = HelloWorldTask.output_file
}
}


57 changes: 57 additions & 0 deletions tests/workflows/wdl_with_imports/hello_world_task.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
task HelloWorldTask {

# ------------------------------------------------
# Input args:

# Required:

# Runtime Options:
String docker
Int? mem
Int? preemptible_attempts
Int? disk_space_gb
Int? cpu
Int? boot_disk_size_gb

# ------------------------------------------------
# Process input args:

# ------------------------------------------------
# Get machine settings:
Boolean use_ssd = false

# You may have to change the following two parameter values depending on the task requirements
Int default_ram_mb = 3 * 1024
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples.
Int default_disk_space_gb = 100

Int default_boot_disk_size_gb = 15

# Mem is in units of GB but our command and memory runtime values are in MB
Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
Int command_mem = machine_mem - 1024

# ------------------------------------------------
# Run our command:
command <<<
set -e
echo 'Hello World!'
>>>

# ------------------------------------------------
# Runtime settings:
# runtime {
# docker: docker
# memory: machine_mem + " MB"
# disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
# bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
# preemptible: 0
# cpu: select_first([cpu, 1])
# }

# ------------------------------------------------
# Outputs:
output {
File output_file = stdout()
}
}