Skip to content

Commit

Permalink
Interpret llmblock.config_path relative to the pipeline config path
Browse files Browse the repository at this point in the history
Given --pipeline=/some/random/dir/for/pipelines it doesn't make sense for
config_path to be relative to /some/random/dir/ - the obvious thing you'd
expect is it to be relative to /some/random/dir/for/pipelines.

This means config that looks like this:

```
  - name: gen_questions
    type: LLMBlock
    config:
      config_path: ../../configs/skills/freeform_questions.yaml
```

Signed-off-by: Mark McLoughlin <[email protected]>
  • Loading branch information
markmc committed Jul 13, 2024
1 parent 88f5003 commit 804ee3a
Show file tree
Hide file tree
Showing 14 changed files with 60 additions and 37 deletions.
7 changes: 5 additions & 2 deletions src/instructlab/sdg/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@


class Block(ABC):
def __init__(self, ctx, block_name: str) -> None:
def __init__(self, ctx, pipe, block_name: str) -> None:
self.ctx = ctx
self.pipe = pipe
self.block_name = block_name

@staticmethod
Expand Down Expand Up @@ -50,6 +51,8 @@ def _load_config(self, config_path: str) -> Union[Dict[str, Any], None]:
:return: The loaded configuration.
"""
if not os.path.isabs(config_path):
config_path = os.path.join(self.ctx.sdg_base, config_path)
config_path = os.path.join(
os.path.dirname(self.pipe.config_path), config_path
)
with open(config_path, "r", encoding="utf-8") as config_file:
return yaml.safe_load(config_file)
4 changes: 3 additions & 1 deletion src/instructlab/sdg/filterblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class FilterByValueBlock(Block):
def __init__(
self,
ctx,
pipe,
block_name,
filter_column,
filter_value,
Expand All @@ -88,6 +89,7 @@ def __init__(
Parameters:
- ctx (PipelineContext): A PipelineContext object containing runtime parameters.
- pipe (Pipeline): The Pipeline containing this block in its chain.
- block_name (str): An identifier for this block.
- filter_column (str): The name of the column in the dataset to apply the filter on.
- filter_value (any or list of any): The value(s) to filter by.
Expand Down Expand Up @@ -147,7 +149,7 @@ def __init__(
- This block will filter the dataset to only include rows where the
"full_name" column contains the substring "John" or "Jane".
"""
super().__init__(ctx, block_name)
super().__init__(ctx, pipe, block_name)
self.value = filter_value if isinstance(filter_value, list) else [filter_value]
self.column_name = filter_column
self.operation = _get_operator_func(operation)
Expand Down
4 changes: 3 additions & 1 deletion src/instructlab/sdg/importblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class ImportBlock(Block):
def __init__(
self,
ctx,
pipe,
block_name,
path,
) -> None:
Expand All @@ -22,10 +23,11 @@ def __init__(
Parameters:
- ctx (PipelineContext): A PipelineContext object containing runtime parameters.
- pipe (Pipeline): The Pipeline containing this block in its chain.
- block_name (str): An identifier for this block.
- path (str): A path (absolute, or relative to the instructlab.sdg package) to a pipeline config file.
"""
super().__init__(ctx, block_name)
super().__init__(ctx, pipe, block_name)
self.path = path
self.pipeline = pipeline.Pipeline.from_file(self.ctx, self.path)

Expand Down
5 changes: 4 additions & 1 deletion src/instructlab/sdg/llmblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,14 @@ class LLMBlock(Block):
def __init__(
self,
ctx,
pipe,
block_name,
config_path,
output_cols,
parser_kwargs={},
batch_kwargs={},
) -> None:
super().__init__(ctx, block_name)
super().__init__(ctx, pipe, block_name)
self.block_config = self._load_config(config_path)
self.prompt_struct = (
"""{system}\n{introduction}\n{principles}\n{examples}\n{generation}"""
Expand Down Expand Up @@ -215,6 +216,7 @@ class ConditionalLLMBlock(LLMBlock):
def __init__(
self,
ctx,
pipe,
block_name,
config_paths,
output_cols,
Expand All @@ -224,6 +226,7 @@ def __init__(
) -> None:
super().__init__(
ctx,
pipe,
block_name,
config_paths[0][0],
output_cols,
Expand Down
11 changes: 6 additions & 5 deletions src/instructlab/sdg/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,28 @@ def __init__(
self.model_family = model_family
self.model_id = model_id
self.num_instructions_to_generate = num_instructions_to_generate
self.sdg_base = resources.files(__package__)
# FIXME: base this on the available number of CPUs
self.num_procs = 8


class Pipeline:
def __init__(self, ctx, chained_blocks: list) -> None:
def __init__(self, ctx, config_path, chained_blocks: list) -> None:
"""
Initialize the Pipeline class with a configuration dictionary.
config_dict: the run config py or yaml loaded into a dictionary
"""
# ctx is a PipelineContext object that supplies context configuration to every block
self.ctx = ctx
# config_path is the path of the pipeline config file used to create this pipeline
self.config_path = config_path
# pipeline config is the run configuration that consists of the pipeline steps
self.chained_blocks = chained_blocks

@classmethod
def from_file(cls, ctx, pipeline_yaml):
if not os.path.isabs(pipeline_yaml):
pipeline_yaml = os.path.join(ctx.sdg_base, pipeline_yaml)
return cls(ctx, _parse_pipeline_config_file(pipeline_yaml))
pipeline_yaml = os.path.join(resources.files(__package__), pipeline_yaml)
return cls(ctx, pipeline_yaml, _parse_pipeline_config_file(pipeline_yaml))

def _drop_duplicates(self, dataset, cols):
"""
Expand All @@ -64,7 +65,7 @@ def generate(self, dataset) -> Dataset:
drop_columns = block_prop.get("drop_columns", [])
gen_kwargs = block_prop.get("gen_kwargs", {})
drop_duplicates_cols = block_prop.get("drop_duplicates", False)
block = block_type(self.ctx, block_name, **block_config)
block = block_type(self.ctx, self, block_name, **block_config)

logger.info("Running block: %s", block_name)
logger.info(dataset)
Expand Down
8 changes: 4 additions & 4 deletions src/instructlab/sdg/pipelines/full/freeform_skills.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ blocks:
- name: gen_questions
type: LLMBlock
config:
config_path: configs/skills/freeform_questions.yaml
config_path: ../../configs/skills/freeform_questions.yaml
output_cols:
- question
batch_kwargs:
Expand All @@ -13,7 +13,7 @@ blocks:
- name: eval_questions
type: LLMBlock
config:
config_path: configs/skills/evaluate_freeform_questions.yaml
config_path: ../../configs/skills/evaluate_freeform_questions.yaml
output_cols:
- evaluation
- score
Expand All @@ -31,13 +31,13 @@ blocks:
- name: gen_responses
type: LLMBlock
config:
config_path: configs/skills/freeform_responses.yaml
config_path: ../../configs/skills/freeform_responses.yaml
output_cols:
- response
- name: evaluate_qa_pair
type: LLMBlock
config:
config_path: configs/skills/evaluate_freeform_pair.yaml
config_path: ../../configs/skills/evaluate_freeform_pair.yaml
output_cols:
- evaluation
- score
Expand Down
10 changes: 5 additions & 5 deletions src/instructlab/sdg/pipelines/full/grounded_skills.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ blocks:
- name: gen_contexts
type: LLMBlock
config:
config_path: configs/skills/contexts.yaml
config_path: ../../configs/skills/contexts.yaml
output_cols:
- context
gen_kwargs:
Expand All @@ -15,7 +15,7 @@ blocks:
- name: gen_grounded_questions
type: LLMBlock
config:
config_path: configs/skills/grounded_questions.yaml
config_path: ../../configs/skills/grounded_questions.yaml
output_cols:
- question
batch_kwargs:
Expand All @@ -25,7 +25,7 @@ blocks:
- name: eval_grounded_questions
type: LLMBlock
config:
config_path: configs/skills/evaluate_grounded_questions.yaml
config_path: ../../configs/skills/evaluate_grounded_questions.yaml
output_cols:
- evaluation
- score
Expand All @@ -43,13 +43,13 @@ blocks:
- name: gen_grounded_responses
type: LLMBlock
config:
config_path: configs/skills/grounded_responses.yaml
config_path: ../../configs/skills/grounded_responses.yaml
output_cols:
- response
- name: evaluate_grounded_qa_pair
type: LLMBlock
config:
config_path: configs/skills/evaluate_grounded_pair.yaml
config_path: ../../configs/skills/evaluate_grounded_pair.yaml
output_cols:
- evaluation
- score
Expand Down
10 changes: 5 additions & 5 deletions src/instructlab/sdg/pipelines/full/knowledge.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ blocks:
- name: gen_mmlu_knowledge
type: LLMBlock
config:
config_path: configs/knowledge/mcq_generation.yaml
config_path: ../../configs/knowledge/mcq_generation.yaml
output_cols:
- mmlubench_question
- mmlubench_answer
Expand All @@ -15,7 +15,7 @@ blocks:
- name: gen_knowledge
type: LLMBlock
config:
config_path: configs/knowledge/generate_questions_responses.yaml
config_path: ../../configs/knowledge/generate_questions_responses.yaml
output_cols:
- question
- response
Expand All @@ -31,7 +31,7 @@ blocks:
- name: eval_faithfulness_qa_pair
type: LLMBlock
config:
config_path: configs/knowledge/evaluate_faithfulness.yaml
config_path: ../../configs/knowledge/evaluate_faithfulness.yaml
output_cols:
- explanation
- judgment
Expand All @@ -49,7 +49,7 @@ blocks:
- name: eval_relevancy_qa_pair
type: LLMBlock
config:
config_path: configs/knowledge/evaluate_relevancy.yaml
config_path: ../../configs/knowledge/evaluate_relevancy.yaml
output_cols:
- feedback
- score
Expand All @@ -68,7 +68,7 @@ blocks:
- name: eval_verify_question
type: LLMBlock
config:
config_path: configs/knowledge/evaluate_question.yaml
config_path: ../../configs/knowledge/evaluate_question.yaml
output_cols:
- explanation
- rating
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/pipelines/simple/freeform_skills.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ blocks:
- name: gen_skill_freeform
type: LLMBlock
config:
config_path: configs/skills/simple_generate_qa_freeform.yaml
config_path: ../../configs/skills/simple_generate_qa_freeform.yaml
output_cols:
- output
gen_kwargs:
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/pipelines/simple/grounded_skills.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ blocks:
- name: gen_skill_grounded
type: LLMBlock
config:
config_path: configs/skills/simple_generate_qa_grounded.yaml
config_path: ../../configs/skills/simple_generate_qa_grounded.yaml
output_cols:
- output
gen_kwargs:
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/sdg/pipelines/simple/knowledge.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ blocks:
- name: gen_knowledge
type: LLMBlock
config:
config_path: configs/knowledge/simple_generate_qa.yaml
config_path: ../../configs/knowledge/simple_generate_qa.yaml
output_cols:
- output
gen_kwargs:
Expand Down
18 changes: 12 additions & 6 deletions src/instructlab/sdg/utilblocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@


class SamplePopulatorBlock(Block):
def __init__(self, ctx, block_name, config_paths, column_name, post_fix="") -> None:
super().__init__(ctx, block_name)
def __init__(
self, ctx, pipe, block_name, config_paths, column_name, post_fix=""
) -> None:
super().__init__(ctx, pipe, block_name)
self.configs = {}
for config in config_paths:
if post_fix:
Expand All @@ -37,8 +39,10 @@ def generate(self, samples) -> Dataset:


class SelectorBlock(Block):
def __init__(self, ctx, block_name, choice_map, choice_col, output_col) -> None:
super().__init__(ctx, block_name)
def __init__(
self, ctx, pipe, block_name, choice_map, choice_col, output_col
) -> None:
super().__init__(ctx, pipe, block_name)
self.choice_map = choice_map
self.choice_col = choice_col
self.output_col = output_col
Expand All @@ -63,8 +67,10 @@ def generate(self, samples: Dataset) -> Dataset:


class CombineColumnsBlock(Block):
def __init__(self, ctx, block_name, columns, output_col, separator="\n\n") -> None:
super().__init__(ctx, block_name)
def __init__(
self, ctx, pipe, block_name, columns, output_col, separator="\n\n"
) -> None:
super().__init__(ctx, pipe, block_name)
self.columns = columns
self.output_col = output_col
self.separator = separator
Expand Down
11 changes: 8 additions & 3 deletions tests/test_filterblock.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Standard
from unittest.mock import patch
from unittest.mock import MagicMock, patch
import operator
import unittest

Expand All @@ -13,16 +13,21 @@

class TestFilterByValueBlock(unittest.TestCase):
def setUp(self):
self.ctx = MagicMock()
self.ctx.num_procs = 1
self.pipe = MagicMock()
self.block = FilterByValueBlock(
PipelineContext(None, None, None, None),
self.ctx,
self.pipe,
"filter_by_age",
filter_column="age",
filter_value="30",
operation="eq",
convert_dtype="int",
)
self.block_with_list = FilterByValueBlock(
PipelineContext(None, None, None, None),
self.ctx,
self.pipe,
"filter_by_age_list",
filter_column="age",
filter_value=["30", "35"],
Expand Down
3 changes: 2 additions & 1 deletion tests/test_importblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ class TestImportBlockWithMockPipeline(unittest.TestCase):
@patch("instructlab.sdg.pipeline.Pipeline")
def setUp(self, mock_pipeline):
self.ctx = MagicMock()
self.pipe = MagicMock()
self.block_name = "test_block"
self.path = "/path/to/config"
self.mock_pipeline = mock_pipeline
self.import_block = ImportBlock(self.ctx, self.block_name, self.path)
self.import_block = ImportBlock(self.ctx, self.pipe, self.block_name, self.path)
self.dataset = Dataset.from_dict({})

def test_initialization(self):
Expand Down

0 comments on commit 804ee3a

Please sign in to comment.