From 7b5ee7b15a72caadeca636f2f85d661d3104bfac Mon Sep 17 00:00:00 2001
From: Russell Bryant <rbryant@redhat.com>
Date: Thu, 13 Jun 2024 11:28:58 -0400
Subject: [PATCH] Wrap read_taxonomy from instructlab.utils

This is a hack to get tests passing back in `instructlab/instructlab`.
There is a test there that tries to mock read_taxonomy, but it's not
working because of how we use it. This hack gives a place the test can
mock.

The real issue here is that the split of code between instructlab and
sdg is a mess and these dependencies in both directions both in code
and in the tests. There's a lot of work to clean this up, but I'm just
trying to get to a functional starting point.

Signed-off-by: Russell Bryant <rbryant@redhat.com>
---
 src/instructlab/sdg/generate_data.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/instructlab/sdg/generate_data.py b/src/instructlab/sdg/generate_data.py
index 7fd41cd4..4bbd9ac5 100644
--- a/src/instructlab/sdg/generate_data.py
+++ b/src/instructlab/sdg/generate_data.py
@@ -20,11 +20,11 @@
     chunk_document,
     max_seed_example_tokens,
     num_chars_from_tokens,
-    read_taxonomy,
 )
 from jinja2 import Template
 from rouge_score import rouge_scorer
 import click
+import instructlab.utils
 import tqdm
 
 # First Party
@@ -358,6 +358,10 @@ def get_instructions_from_model(
     return instruction_data, discarded
 
 
+def read_taxonomy(*args, **kwargs):
+    return instructlab.utils.read_taxonomy(*args, **kwargs)
+
+
 def generate_data(
     logger,
     api_base,