diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml new file mode 100644 index 0000000..d1f9f9a --- /dev/null +++ b/.github/workflows/python-lint.yml @@ -0,0 +1,39 @@ +name: Python Linting + +on: + push: + branches: [ "main" ] + paths: + - 'py-denormalized/**' + pull_request: + branches: [ "main" ] + paths: + - 'py-denormalized/**' + workflow_dispatch: + +jobs: + lint: + runs-on: ubuntu-latest + defaults: + run: + working-directory: py-denormalized # Set the working directory for all run steps + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + cache: 'pip' + + - name: Install Ruff + run: pip install ruff + + - name: Run Ruff + # Ruff will look for pyproject.toml in the py-denormalized directory + run: | + # Run Ruff linter + ruff check . + # Run Ruff formatter + ruff format . --check diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 898cba6..f59a614 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -9,10 +9,12 @@ on: push: branches: - main - - master tags: - "*" pull_request: + branches: [ "main" ] + paths: + - 'py-denormalized/**' # Only trigger on changes in this directory workflow_dispatch: permissions: diff --git a/py-denormalized/pyproject.toml b/py-denormalized/pyproject.toml index 5d17649..c7adff2 100644 --- a/py-denormalized/pyproject.toml +++ b/py-denormalized/pyproject.toml @@ -33,8 +33,8 @@ dev-dependencies = [ # Enable docstring linting using the google style guide [tool.ruff.lint] -select = ["E4", "E7", "E9", "F", "D", "W"] -ignore = ["D103"] +# select = ["E4", "E7", "E9", "F", "D", "W"] +ignore = ["F", "D100", "D101", "D102", "D103", "D107"] [tool.ruff.lint.pydocstyle] convention = "google" diff --git a/py-denormalized/python/denormalized/feast_data_stream.py b/py-denormalized/python/denormalized/feast_data_stream.py index e289b8c..c4124d4 100644 --- a/py-denormalized/python/denormalized/feast_data_stream.py +++ b/py-denormalized/python/denormalized/feast_data_stream.py @@ -3,7 +3,6 @@ import pyarrow as pa from denormalized._d_internal import PyDataStream -from denormalized.datafusion import Expr from feast import FeatureStore, Field from feast.data_source import PushMode from feast.type_map import pa_to_feast_value_type diff --git a/py-denormalized/python/denormalized/utils.py b/py-denormalized/python/denormalized/utils.py index adb8e10..fb1aeb5 100644 --- a/py-denormalized/python/denormalized/utils.py +++ b/py-denormalized/python/denormalized/utils.py @@ -6,8 +6,7 @@ def to_internal_expr(expr: Expr | str) -> internal_exprs: """Convert a single Expr or string to internal exprs.""" return Expr.column(expr).expr if isinstance(expr, str) else expr.expr + def to_internal_exprs(exprs: list[Expr] | list[str]) -> list[internal_exprs]: """Convert a list of Expr or string to a list of internal exprs.""" - return [ - to_internal_expr(arg) for arg in exprs - ] + return [to_internal_expr(arg) for arg in exprs] diff --git a/py-denormalized/python/examples/udaf_example.py b/py-denormalized/python/examples/udaf_example.py index 724fdf0..82e6d50 100644 --- a/py-denormalized/python/examples/udaf_example.py +++ b/py-denormalized/python/examples/udaf_example.py @@ -9,7 +9,6 @@ from denormalized import Context from denormalized.datafusion import Accumulator, col -from denormalized.datafusion import functions as f from denormalized.datafusion import udaf @@ -27,6 +26,7 @@ def signal_handler(sig, frame): "reading": 0.0, } + class TotalValuesRead(Accumulator): # Define the state type as a struct containing a map acc_state_type = pa.struct([("counts", pa.map_(pa.string(), pa.int64()))]) @@ -45,14 +45,18 @@ def merge(self, states: pa.Array) -> None: return for state in states: if state is not None: - counts_map = state.to_pylist()[0] # will always be one element struct + counts_map = state.to_pylist()[0] # will always be one element struct for k, v in counts_map["counts"]: self.counts[k] += v def state(self) -> List[pa.Scalar]: # Convert current state to Arrow array format result = {"counts": dict(self.counts.items())} - return [pa.scalar(result, type=pa.struct([("counts", pa.map_(pa.string(), pa.int64()))]))] + return [ + pa.scalar( + result, type=pa.struct([("counts", pa.map_(pa.string(), pa.int64()))]) + ) + ] def evaluate(self) -> pa.Scalar: return self.state()[0] @@ -69,8 +73,11 @@ def print_batch(rb: pa.RecordBatch): return print(rb) + ctx = Context() -ds = ctx.from_topic("temperature", json.dumps(sample_event), bootstrap_server, "occurred_at_ms") +ds = ctx.from_topic( + "temperature", json.dumps(sample_event), bootstrap_server, "occurred_at_ms" +) ds = ds.window( [], diff --git a/py-denormalized/python/examples/udf_example.py b/py-denormalized/python/examples/udf_example.py index a198705..625a789 100644 --- a/py-denormalized/python/examples/udf_example.py +++ b/py-denormalized/python/examples/udf_example.py @@ -64,6 +64,4 @@ def print_batch(rb: pa.RecordBatch): col("count"), lit(1400.0), ), -).sink( - print_batch -) +).sink(print_batch)