Skip to content

Commit

Permalink
More corrections to the pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
metric-space committed Nov 22, 2023
1 parent 2f91c25 commit 3b8e270
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions dalm/pipelines/reading_comprehension_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ def pipeline(
generation_state = {"processed_files": [], "total_files": 0, "files_missed": 0}
pickle.dump(generation_state, open(generation_state_file, "wb"))

if not os.path.exists(llm_dataset_output_path):
os.makedirs(llm_dataset_output_path)

# NOTE: this operation is time consuming and very expensive
# Attention has been paid to try to save intermediate steps in case of failure
# so that the generation can be resumed from the last checkpoint
Expand Down Expand Up @@ -178,6 +181,7 @@ def pipeline(
log_freq=log_freq,
neftune_noise_alpha=neftune_noise_alpha,
log_with=log_with,
local_dataset=True,
)


Expand Down

0 comments on commit 3b8e270

Please sign in to comment.