diff --git a/src/instructlab/sdg/llmblock.py b/src/instructlab/sdg/llmblock.py index bba5403c..d7832ad2 100644 --- a/src/instructlab/sdg/llmblock.py +++ b/src/instructlab/sdg/llmblock.py @@ -8,6 +8,7 @@ # Local from .block import Block from .logger_config import setup_logger +from typing import Any, Dict, Union logger = setup_logger(__name__) @@ -56,9 +57,8 @@ def _parse(self, generated_string) -> dict: pattern = re.escape(start_tag) + r"(.*?)" + re.escape(end_tag) all_matches = re.findall(pattern, generated_string, re.DOTALL) matches[output_col] = ( - [match.strip() for match in all_matches] if all_matches else None + [match.strip() for match in all_matches] if all_matches else [] ) - return matches def _generate(self, samples, **gen_kwargs) -> list: @@ -123,7 +123,7 @@ def __init__(self, block_name, config_paths, client, model_id, output_cols, sele def _parse(self, generated_string): if self.parser_name == 'default': - return matches + return super()._parse(generated_string) elif self.parser_name == 'multi-line-logical-section': return {self.output_cols[0]: self.extract_multiline_logical_section(generated_string)}