From 3147c7590ca226b4e5fc3b1362046f324c92d34e Mon Sep 17 00:00:00 2001 From: "Brad P. Crochet" Date: Thu, 1 Aug 2024 09:37:39 -0400 Subject: [PATCH] fix: mt_bench_branch should ignore knowledge in generate The mt_bench_branch currently only support pre-v3 yaml, and knowledge is currently at v3. But knowledge should not even be included in the eval run. This ignores the knowledge directory in order to skip it. Signed-off-by: Brad P. Crochet --- .../eval/mt_bench_branch_generator.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/instructlab/eval/mt_bench_branch_generator.py b/src/instructlab/eval/mt_bench_branch_generator.py index bd22294..511a227 100644 --- a/src/instructlab/eval/mt_bench_branch_generator.py +++ b/src/instructlab/eval/mt_bench_branch_generator.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # Standard +from pathlib import Path import hashlib import json import os @@ -22,10 +23,20 @@ def get_file_paths(directory): logger.debug(locals()) file_paths = [] - for root, _, files in os.walk(directory): - for file in files: - if file.split("/")[-1] == "qna.yaml": - file_paths.append(os.path.join(root, file)) + root_paths = [ + entry + for entry in Path(directory).iterdir() + if entry.is_dir() + if not entry.name.startswith(".") + if entry.name != "knowledge" + if entry.name != "docs" + if entry.name != "scripts" + ] + for basedir in root_paths: + for root, _, files in os.walk(basedir): + file_paths.extend( + [os.path.join(root, file) for file in files if file == "qna.yaml"] + ) return file_paths