diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml new file mode 100644 index 0000000..05d8679 --- /dev/null +++ b/.github/workflows/pipeline.yaml @@ -0,0 +1,19 @@ +name: Test pipeline + +on: + push: + branches: + - main + pull_request: + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + with: + python-version: '3.10' + - uses: pre-commit/action@v3.0.0 + with: + extra_args: --all-files --show-diff-on-failure diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c2d8a37 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,44 @@ +ci: + autoupdate_branch: "main" + autoupdate_schedule: monthly +repos: + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: 'v0.0.264' + hooks: + - id: ruff + files: | + (?x)^( + src/.*| + )$ + args: [ + "--target-version=py38", + "--fix", + "--exit-non-zero-on-fix", + ] + + - repo: https://github.com/PyCQA/bandit + rev: 1.7.4 + hooks: + - id: bandit + name: bandit + args: [ + "-r", + "./src/*" + ] + + - repo: https://github.com/psf/black + rev: 23.1.0 + hooks: + - id: black + name: black + files: | + (?x)^( + src/.*| + )$ + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.2.0 + hooks: + - id: mypy + args: ["--ignore-missing-imports", "src"] + pass_filenames: false \ No newline at end of file diff --git a/components/generate_prompts/Dockerfile b/src/components/generate_prompts/Dockerfile similarity index 100% rename from components/generate_prompts/Dockerfile rename to src/components/generate_prompts/Dockerfile diff --git a/components/generate_prompts/README.md b/src/components/generate_prompts/README.md similarity index 100% rename from components/generate_prompts/README.md rename to src/components/generate_prompts/README.md diff --git a/components/generate_prompts/fondant_component.yaml b/src/components/generate_prompts/fondant_component.yaml similarity index 100% rename from components/generate_prompts/fondant_component.yaml rename to src/components/generate_prompts/fondant_component.yaml diff --git a/components/generate_prompts/requirements.txt b/src/components/generate_prompts/requirements.txt similarity index 100% rename from components/generate_prompts/requirements.txt rename to src/components/generate_prompts/requirements.txt diff --git a/components/generate_prompts/src/main.py b/src/components/generate_prompts/src/main.py similarity index 91% rename from components/generate_prompts/src/main.py rename to src/components/generate_prompts/src/main.py index 9d58a28..0c9cc1c 100644 --- a/components/generate_prompts/src/main.py +++ b/src/components/generate_prompts/src/main.py @@ -1,5 +1,6 @@ """ -This component generates a set of initial prompts that will be used to retrieve images from the LAION-5B dataset. +This component generates a set of initial prompts that will be used to retrieve images +from the LAION-5B dataset. """ import itertools import logging @@ -9,7 +10,6 @@ import pandas as pd from fondant.component import DaskLoadComponent -from fondant.executor import DaskLoadExecutor logger = logging.getLogger(__name__) @@ -99,12 +99,12 @@ def make_interior_prompt(room: str, prefix: str, style: str) -> str: class GeneratePromptsComponent(DaskLoadComponent): def __init__(self, *args, n_rows_to_load: t.Optional[int]) -> None: """ - Generate a set of initial prompts that will be used to retrieve images from the LAION-5B - dataset. + Generate a set of initial prompts that will be used to retrieve images from the + LAION-5B dataset. Args: - n_rows_to_load: Optional argument that defines the number of rows to load. Useful for - testing pipeline runs on a small scale + n_rows_to_load: Optional argument that defines the number of rows to load. + Useful for testing pipeline runs on a small scale """ self.n_rows_to_load = n_rows_to_load diff --git a/components/write_to_hub_controlnet/fondant_component.yaml b/src/components/write_to_hub_controlnet/fondant_component.yaml similarity index 100% rename from components/write_to_hub_controlnet/fondant_component.yaml rename to src/components/write_to_hub_controlnet/fondant_component.yaml diff --git a/notebook.ipynb b/src/notebook.ipynb similarity index 100% rename from notebook.ipynb rename to src/notebook.ipynb diff --git a/pipeline.py b/src/pipeline.py similarity index 96% rename from pipeline.py rename to src/pipeline.py index c2339a2..026d543 100644 --- a/pipeline.py +++ b/src/pipeline.py @@ -14,13 +14,13 @@ N_ROWS_TO_LOAD = 10 # Set to None to load all rows # Create data directory if it doesn't exist and if it's a local path -if fsspec.core.url_to_fs(BASE_PATH)[0].protocol == ('file', 'local'): +if fsspec.core.url_to_fs(BASE_PATH)[0].protocol == ("file", "local"): Path(BASE_PATH).mkdir(parents=True, exist_ok=True) pipeline = Pipeline( pipeline_name="controlnet-pipeline", pipeline_description="Pipeline that collects data to train ControlNet", - base_path=BASE_PATH + base_path=BASE_PATH, ) # Define component ops