-
Notifications
You must be signed in to change notification settings - Fork 0
/
pipeline.py
45 lines (36 loc) · 1.52 KB
/
pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""Pipeline used to create the dataset to train the StarCoder model."""
import logging
from fondant.logger import configure_logging
from fondant.pipeline import ComponentOp, Pipeline, Client
from pipeline_config import PipelineConfig
configure_logging()
logger = logging.getLogger(__name__)
# Pipeline description
team_name = "cheesecake" # TODO: insert your team name
pipeline_name = f"Stack filtering pipeline {team_name}"
pipeline_description = "A pipeline for filtering the stack dataset"
# Initialize pipeline and client
pipeline = Pipeline(
pipeline_name=pipeline_name,
pipeline_description=pipeline_description,
base_path=PipelineConfig.BASE_PATH,
)
client = Client(host=PipelineConfig.HOST)
load_from_hub_op = ComponentOp(
component_spec_path="components/load_from_hub_stack/fondant_component.yaml",
arguments={"dataset_name": "ml6team/the-stack-smol-python"},
)
# TODO: add your component here
# your_custom_component_op = ComponentOp(
# component_spec_path="components/your_custom_component/fondant_component.yaml",
# arguments={}, # TODO: insert your component's arguments here
# )
pii_redaction_op = ComponentOp(
component_spec_path="components/pii_redaction/fondant_component.yaml",
)
pipeline.add_op(load_from_hub_op)
# TODO: Add your component op to the pipeline
# pipeline.add_op(your_custom_component_op, dependencies=load_from_hub_op)
# TODO: Update the dependencies to include your component
pipeline.add_op(pii_redaction_op, dependencies=load_from_hub_op)
client.compile_and_run(pipeline=pipeline)