Skip to content

Commit

Permalink
remove __name__ from logging.getLogger() calls to use root logger
Browse files Browse the repository at this point in the history
Signed-off-by: Khaled Sulayman <[email protected]>
  • Loading branch information
khaledsulayman committed Dec 11, 2024
1 parent 81fad3c commit 243b5a1
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 23 deletions.
7 changes: 2 additions & 5 deletions src/instructlab/sdg/datamixing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# when |knowledge| << |skills|
MIN_UPSAMPLE_THRESHOLD = 0.03
ALLOWED_COLS = ["id", "messages", "metadata"]
LOGGER = logging.getLogger(__name__)
LOGGER = logging.getLogger()


class DatasetListing(TypedDict):
Expand Down Expand Up @@ -739,10 +739,7 @@ def _gen_mixed_data(self, recipe, output_file_recipe, output_file_data):
self.num_procs,
)

def generate(self, logger=None):
if logger is not None:
global LOGGER # pylint: disable=global-statement
LOGGER = logger
def generate(self):
self._gen_mixed_data(
self.knowledge_recipe,
self.output_file_knowledge_recipe,
Expand Down
5 changes: 2 additions & 3 deletions src/instructlab/sdg/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def generate_data(
document_output_dir = Path(output_dir) / f"documents-{date_suffix}"

leaf_nodes = read_taxonomy_leaf_nodes(
taxonomy, taxonomy_base, yaml_rules, document_output_dir, logger=LOGGER
taxonomy, taxonomy_base, yaml_rules, document_output_dir
)
if not leaf_nodes:
raise GenerateException("Error: No new leaf nodes found in the taxonomy.")
Expand Down Expand Up @@ -406,7 +406,6 @@ def generate_data(
document_output_dir,
model_name,
docling_model_path=docling_model_path,
logger=LOGGER,
)

if not samples:
Expand Down Expand Up @@ -458,7 +457,7 @@ def generate_data(
system_prompt,
)

mixer.generate(logger=LOGGER)
mixer.generate()

generate_duration = time.time() - generate_start
LOGGER.info(f"Generation took {generate_duration:.2f}s")
Expand Down
8 changes: 2 additions & 6 deletions src/instructlab/sdg/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from .blocks.block import Block
from .registry import BlockRegistry

LOGGER = logging.getLogger(__name__)
LOGGER = logging.getLogger()


# This is part of the public API.
Expand Down Expand Up @@ -134,16 +134,12 @@ def from_file(cls, ctx, pipeline_yaml):
pipeline_yaml = os.path.join(resources.files(__package__), pipeline_yaml)
return cls(ctx, pipeline_yaml, *_parse_pipeline_config_file(pipeline_yaml))

def generate(self, dataset, checkpoint_name=None, logger=None) -> Dataset:
def generate(self, dataset, checkpoint_name=None) -> Dataset:
"""
Generate the dataset by running the pipeline steps.
dataset: the input dataset
checkpoint_name: unique subdir name for the checkpoint within checkpoint_dir
"""

if logger is not None:
global LOGGER # pylint: disable=global-statement
LOGGER = logger
# The checkpointer allows us to resume from where we left off
# Saving the output of pipe instances along the way
checkpoint_dir = None
Expand Down
11 changes: 2 additions & 9 deletions src/instructlab/sdg/utils/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# Initialize the pdf parser
PDFParser = pdf_parser_v1()

LOGGER = logging.getLogger(__name__)
LOGGER = logging.getLogger()


def _is_taxonomy_file(fn: str) -> bool:
Expand Down Expand Up @@ -372,11 +372,8 @@ def read_taxonomy(


def read_taxonomy_leaf_nodes(
taxonomy, taxonomy_base, yaml_rules, document_output_dir=None, logger=None
taxonomy, taxonomy_base, yaml_rules, document_output_dir=None
):
if logger is not None:
global LOGGER # pylint: disable=global-statement
LOGGER = logger
seed_instruction_data = read_taxonomy(
taxonomy, taxonomy_base, yaml_rules, document_output_dir
)
Expand Down Expand Up @@ -466,11 +463,7 @@ def leaf_node_to_samples(
document_output_dir,
model_name,
docling_model_path=None,
logger=None,
):
if logger is not None:
global LOGGER # pylint: disable=global-statement
LOGGER = logger
if not leaf_node:
return []
if leaf_node[0].get("documents"):
Expand Down

0 comments on commit 243b5a1

Please sign in to comment.