Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Block Name In Errors #155

Merged
merged 5 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/instructlab/sdg/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,33 @@ def generate(self, dataset) -> Dataset:
dataset: the input dataset
"""
for block_prop in self.chained_blocks:
# Parse and instantiate the block
block_name = block_prop["name"]
block_type = _lookup_block_type(block_prop["type"])
block_config = block_prop["config"]
drop_columns = block_prop.get("drop_columns", [])
drop_duplicates_cols = block_prop.get("drop_duplicates", False)
block = block_type(self.ctx, self, block_name, **block_config)
gabe-l-hart marked this conversation as resolved.
Show resolved Hide resolved

logger.info("Running block: %s", block_name)
logger.info(dataset)

dataset = block.generate(dataset)
# Execute the block and wrap errors with the block name/type
try:
dataset = block.generate(dataset)

except Exception as err:
block_exc_err = (
gabe-l-hart marked this conversation as resolved.
Show resolved Hide resolved
f"BLOCK ERROR [{block_type.__name__}/{block_name}]: {err}"
)

# Try to raise the same exception type. This can fail if the
# exception is a non-standard type that has a different init
# signature, so fall back to raising a RuntimeError in that case.
try:
wrapper_err = type(err)(block_exc_err)
except TypeError:
wrapper_err = RuntimeError(block_exc_err)
raise wrapper_err from err

# If at any point we end up with an empty data set, the pipeline has failed
if len(dataset) == 0:
Expand Down
64 changes: 64 additions & 0 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""
Unit tests for common Pipeline functionality
"""

# Standard
from unittest import mock

# Third Party
import pytest

# First Party
from instructlab.sdg.pipeline import Pipeline

## Helpers ##


class CustomTypeError(TypeError):
pass


class NoArgError(RuntimeError):
"""Exception that can't be instantiated with a single argument"""

def __init__(self):
super().__init__("no args")


@pytest.mark.parametrize(
["failure_exc", "exp_err_type"],
[
(CustomTypeError("Oh no!"), CustomTypeError),
(NoArgError(), RuntimeError),
],
)
def test_pipeline_named_errors_match_type(failure_exc, exp_err_type):
"""Validate that block types and names appear in the error message from a
pipeline exception and that the type of the error is preserved.
"""
mock_dataset = ["not empty"]
working_block = mock.MagicMock()
working_block().generate.return_value = mock_dataset
failure_block = mock.MagicMock()
failure_block.__name__ = "BadBlock"
failure_block().generate = mock.MagicMock(side_effect=failure_exc)
pipe_cfg = [
{"name": "I work", "type": "working", "config": {}},
{"name": "I don't", "type": "failure", "config": {}},
]
with mock.patch(
"instructlab.sdg.pipeline._block_types",
{
"working": working_block,
"failure": failure_block,
},
):
pipe = Pipeline(None, None, pipe_cfg)
with pytest.raises(exp_err_type) as exc_ctx:
pipe.generate(None)

assert exc_ctx.value.__cause__ is failure_exc
assert (
str(exc_ctx.value)
== f"BLOCK ERROR [{failure_block.__name__}/{pipe_cfg[1]['name']}]: {failure_exc}"
)