From 7b5ee7b15a72caadeca636f2f85d661d3104bfac Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Thu, 13 Jun 2024 11:28:58 -0400 Subject: [PATCH] Wrap read_taxonomy from instructlab.utils This is a hack to get tests passing back in `instructlab/instructlab`. There is a test there that tries to mock read_taxonomy, but it's not working because of how we use it. This hack gives a place the test can mock. The real issue here is that the split of code between instructlab and sdg is a mess and these dependencies in both directions both in code and in the tests. There's a lot of work to clean this up, but I'm just trying to get to a functional starting point. Signed-off-by: Russell Bryant --- src/instructlab/sdg/generate_data.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/instructlab/sdg/generate_data.py b/src/instructlab/sdg/generate_data.py index 7fd41cd4..4bbd9ac5 100644 --- a/src/instructlab/sdg/generate_data.py +++ b/src/instructlab/sdg/generate_data.py @@ -20,11 +20,11 @@ chunk_document, max_seed_example_tokens, num_chars_from_tokens, - read_taxonomy, ) from jinja2 import Template from rouge_score import rouge_scorer import click +import instructlab.utils import tqdm # First Party @@ -358,6 +358,10 @@ def get_instructions_from_model( return instruction_data, discarded +def read_taxonomy(*args, **kwargs): + return instructlab.utils.read_taxonomy(*args, **kwargs) + + def generate_data( logger, api_base,