From bab135c9b62d823b144464ec8dbecdcb1024038f Mon Sep 17 00:00:00 2001 From: Aakanksha Duggal Date: Wed, 8 Jan 2025 10:38:11 -0500 Subject: [PATCH] Update HTML error to warning to avoid exiting Signed-off-by: Aakanksha Duggal --- src/instructlab/sdg/utils/taxonomy.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/instructlab/sdg/utils/taxonomy.py b/src/instructlab/sdg/utils/taxonomy.py index d582c5f7..c0d4dc1f 100644 --- a/src/instructlab/sdg/utils/taxonomy.py +++ b/src/instructlab/sdg/utils/taxonomy.py @@ -173,12 +173,10 @@ def _get_documents( with open(file_path, "r", encoding="utf-8") as file: content = file.read() if _string_contains_html(content): - raise ValueError( - f"Provided markdown file {file_path} contains" - " HTML, which is currently unsupported. Please" - " format your markdown documents without the" - " use of HTML or use a different document" - " filetype." + logging.warning( + f"Provided markdown file {file_path} contains HTML contents, which is currently unsupported as a part of markdown" + "NOTE: Continuing this might affect your data generation quality." + "To get best results please format your markdown documents without the use of HTML or use a different document filetype." ) file_contents.append(content) filepaths.append(Path(file_path))