From 5c770df4a3a73135a7336d5f4d4cfa1595c96bc4 Mon Sep 17 00:00:00 2001 From: Pedro Eugenio Rocha Pedreira Date: Tue, 14 Jan 2025 14:28:42 -0800 Subject: [PATCH] feat(python): New PyVelox bindings for Types (#12040) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/12040 Adding new PyVelox bindings for Types. The purpose is not to provide the full functionality of the underlying C++ structures, but to provide the minimal API required to enable Python users to execute query plans. The subsequent PRs/diffs will contain the remaining binding libraries. Reviewed By: kgpai Differential Revision: D67957151 fbshipit-source-id: b3749248469e85b54fe46edb12c3b79447dd4981 --- velox/CMakeLists.txt | 4 ++ velox/py/CMakeLists.txt | 22 ++++++++ velox/py/tests/test_type.py | 86 +++++++++++++++++++++++++++++ velox/py/type/PyType.cpp | 24 ++++++++ velox/py/type/PyType.h | 107 ++++++++++++++++++++++++++++++++++++ velox/py/type/type.cpp | 56 +++++++++++++++++++ velox/py/type/type.pyi | 35 ++++++++++++ 7 files changed, 334 insertions(+) create mode 100644 velox/py/CMakeLists.txt create mode 100644 velox/py/tests/test_type.py create mode 100644 velox/py/type/PyType.cpp create mode 100644 velox/py/type/PyType.h create mode 100644 velox/py/type/type.cpp create mode 100644 velox/py/type/type.pyi diff --git a/velox/CMakeLists.txt b/velox/CMakeLists.txt index f904135355fa..b51e2a77073c 100644 --- a/velox/CMakeLists.txt +++ b/velox/CMakeLists.txt @@ -74,6 +74,10 @@ if(${VELOX_ENABLE_GPU}) add_subdirectory(external/jitify) endif() +if(${VELOX_BUILD_PYTHON_PACKAGE}) + add_subdirectory(py) +endif() + # substrait converter if(${VELOX_ENABLE_SUBSTRAIT}) add_subdirectory(substrait) diff --git a/velox/py/CMakeLists.txt b/velox/py/CMakeLists.txt new file mode 100644 index 000000000000..f710c0059f4d --- /dev/null +++ b/velox/py/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# velox.py.type library: +velox_add_library(velox_py_type_lib type/PyType.cpp) +velox_link_libraries(velox_py_type_lib velox_type pybind11::module) + +pybind11_add_module(type MODULE type/type.cpp) +target_link_libraries( + type + PRIVATE velox_py_type_lib) diff --git a/velox/py/tests/test_type.py b/velox/py/tests/test_type.py new file mode 100644 index 000000000000..c19909277a86 --- /dev/null +++ b/velox/py/tests/test_type.py @@ -0,0 +1,86 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from velox.py.type import ( + Type, + BIGINT, + INTEGER, + SMALLINT, + TINYINT, + BOOLEAN, + REAL, + DOUBLE, + VARCHAR, + VARBINARY, + ARRAY, + MAP, + ROW, +) + + +class TestPyVeloxTypes(unittest.TestCase): + def test_simple_types(self): + self.assertTrue(isinstance(BIGINT(), Type)) + self.assertTrue(isinstance(INTEGER(), Type)) + self.assertTrue(isinstance(SMALLINT(), Type)) + self.assertTrue(isinstance(TINYINT(), Type)) + self.assertTrue(isinstance(BOOLEAN(), Type)) + self.assertTrue(isinstance(REAL(), Type)) + self.assertTrue(isinstance(DOUBLE(), Type)) + self.assertTrue(isinstance(VARCHAR(), Type)) + self.assertTrue(isinstance(VARBINARY(), Type)) + + def test_complex_types(self): + self.assertTrue(isinstance(ARRAY(VARCHAR()), Type)) + self.assertTrue(isinstance(MAP(BIGINT(), VARCHAR()), Type)) + self.assertTrue( + isinstance(ROW(["c0", "c1"], [INTEGER(), ARRAY(VARCHAR())]), Type) + ) + self.assertTrue(isinstance(ROW(), Type)) + + # Invalid complex types. + self.assertRaises(TypeError, ARRAY) + self.assertRaises(TypeError, MAP) + self.assertRaises(TypeError, MAP, BIGINT()) + self.assertRaises(RuntimeError, ROW, ["col1"]) + + def test_to_str(self): + self.assertEqual("BOOLEAN", str(BOOLEAN())) + self.assertEqual("VARBINARY", str(VARBINARY())) + + self.assertEqual("ARRAY", str(ARRAY(DOUBLE()))) + self.assertEqual("MAP", str(MAP(VARCHAR(), DOUBLE()))) + self.assertEqual( + "ROW", str(ROW(["c0", "c1"], [INTEGER(), TINYINT()])) + ) + velox_type = ROW( + ["c0", "c1"], + [MAP(MAP(ARRAY(ARRAY(REAL())), DOUBLE()), BOOLEAN()), TINYINT()], + ) + self.assertEqual( + "ROW>,DOUBLE>,BOOLEAN>,c1:TINYINT>", + str(velox_type), + ) + + def test_equality(self): + self.assertEqual(INTEGER(), INTEGER()) + self.assertEqual( + MAP(MAP(TINYINT(), BOOLEAN()), BOOLEAN()), + MAP(MAP(TINYINT(), BOOLEAN()), BOOLEAN()), + ) + self.assertEqual(ROW(["a"], [INTEGER()]), ROW(["a"], [INTEGER()])) + + self.assertNotEqual(BIGINT(), INTEGER()) + self.assertNotEqual(ARRAY(BIGINT()), REAL()) diff --git a/velox/py/type/PyType.cpp b/velox/py/type/PyType.cpp new file mode 100644 index 000000000000..919aeaf2f440 --- /dev/null +++ b/velox/py/type/PyType.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/py/type/PyType.h" +#include + +namespace facebook::velox::py { + +namespace py = pybind11; + +} // namespace facebook::velox::py diff --git a/velox/py/type/PyType.h b/velox/py/type/PyType.h new file mode 100644 index 000000000000..d6744a3e454a --- /dev/null +++ b/velox/py/type/PyType.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include "velox/type/Type.h" + +namespace facebook::velox::py { + +class PyType { + public: + explicit PyType(const TypePtr& type = nullptr) : type_(type) {} + + std::string toString() const { + if (!type_) { + return "[nullptr]"; + } + return type_->toString(); + } + + TypePtr type() const { + return type_; + } + + bool equivalent(const PyType& other) const { + return type_->equivalent(*other.type()); + } + + // Factory functions: + + static PyType createRowType( + const std::vector& names, + const std::vector& pyTypes) { + std::vector types; + for (const auto& pyType : pyTypes) { + types.emplace_back(pyType.type()); + } + + if (names.empty()) { + return PyType{ROW(std::move(types))}; + } + return PyType{ROW(folly::copy(names), std::move(types))}; + } + + static PyType createMapType(const PyType& keyType, const PyType& valueType) { + return PyType{MAP(keyType.type(), valueType.type())}; + } + + static PyType createArrayType(const PyType& elementsType) { + return PyType{ARRAY(elementsType.type())}; + } + + static PyType createBigint() { + return PyType{BIGINT()}; + } + + static PyType createInteger() { + return PyType{INTEGER()}; + } + + static PyType createSmallint() { + return PyType{SMALLINT()}; + } + + static PyType createTinyint() { + return PyType{TINYINT()}; + } + + static PyType createBoolean() { + return PyType{BOOLEAN()}; + } + + static PyType createReal() { + return PyType{REAL()}; + } + + static PyType createDouble() { + return PyType{DOUBLE()}; + } + + static PyType createVarchar() { + return PyType{VARCHAR()}; + } + + static PyType createVarbinary() { + return PyType{VARBINARY()}; + } + + private: + TypePtr type_; +}; + +} // namespace facebook::velox::py diff --git a/velox/py/type/type.cpp b/velox/py/type/type.cpp new file mode 100644 index 000000000000..e6519785822a --- /dev/null +++ b/velox/py/type/type.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "velox/py/type/PyType.h" + +namespace py = pybind11; + +PYBIND11_MODULE(type, m) { + using namespace facebook; + + py::class_(m, "Type") + .def("__str__", &velox::py::PyType::toString) + .def("__eq__", [](velox::py::PyType& u, velox::py::PyType& v) { + return u.equivalent(v); + }); + + m.def("BIGINT", &velox::py::PyType::createBigint); + m.def("INTEGER", &velox::py::PyType::createInteger); + m.def("SMALLINT", &velox::py::PyType::createSmallint); + m.def("TINYINT", &velox::py::PyType::createTinyint); + m.def("BOOLEAN", &velox::py::PyType::createBoolean); + + m.def("REAL", &velox::py::PyType::createReal); + m.def("DOUBLE", &velox::py::PyType::createDouble); + + m.def("VARCHAR", &velox::py::PyType::createVarchar); + m.def("VARBINARY", &velox::py::PyType::createVarbinary); + + m.def("ARRAY", &velox::py::PyType::createArrayType, py::arg("elements_type")); + m.def( + "MAP", + &velox::py::PyType::createMapType, + py::arg("key_type"), + py::arg("value_type")); + m.def( + "ROW", + &velox::py::PyType::createRowType, + py::arg("names") = std::vector{}, + py::arg("types") = std::vector{}); +} diff --git a/velox/py/type/type.pyi b/velox/py/type/type.pyi new file mode 100644 index 000000000000..d12afc8e6d26 --- /dev/null +++ b/velox/py/type/type.pyi @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pyre-unsafe + +from typing import List + + +class Type: ... + +def BIGINT() -> Type: ... +def INTEGER() -> Type: ... +def SMALLINT() -> Type: ... +def TINYINT() -> Type: ... +def BOOLEAN() -> Type: ... +def REAL() -> Type: ... +def DOUBLE() -> Type: ... +def VARCHAR() -> Type: ... +def VARBINARY() -> Type: ... +def ARRAY(elements_type: Type) -> Type: ... +def MAP(key_type: Type, value_type: Type) -> Type: ... +def ROW(names: List[str] = [], types: List[Type] = []) -> Type: ...