From eeaba406959476f1abbbb2159596a6852e9c8a66 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Thu, 9 May 2024 21:15:04 +0800 Subject: [PATCH 01/15] feat(query): add py script --- Cargo.lock | 594 +++++++++++++----- Cargo.toml | 3 +- src/bendpy/pyproject.toml | 2 +- src/query/service/Cargo.toml | 1 + .../transforms/transform_udf_script.rs | 26 + src/query/sql/src/planner/binder/udf.rs | 5 +- .../suites/udf_native/03_0001_udf_js.test | 2 +- .../suites/udf_native/03_0001_udf_py.test | 15 + 8 files changed, 484 insertions(+), 164 deletions(-) create mode 100644 tests/sqllogictests/suites/udf_native/03_0001_udf_py.test diff --git a/Cargo.lock b/Cargo.lock index 6aaca87f56df..e20aab41a626 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -323,7 +323,6 @@ dependencies = [ "arrow-schema 50.0.0", "arrow-select 50.0.0", "arrow-string 50.0.0", - "pyo3", ] [[package]] @@ -388,6 +387,22 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-array" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" +dependencies = [ + "ahash 0.8.10", + "arrow-buffer 51.0.0", + "arrow-data 51.0.0", + "arrow-schema 51.0.0", + "chrono", + "half", + "hashbrown 0.14.3", + "num", +] + [[package]] name = "arrow-buffer" version = "47.0.0" @@ -410,6 +425,17 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" +dependencies = [ + "bytes", + "half", + "num", +] + [[package]] name = "arrow-cast" version = "47.0.0" @@ -446,6 +472,26 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-cast" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" +dependencies = [ + "arrow-array 51.0.0", + "arrow-buffer 51.0.0", + "arrow-data 51.0.0", + "arrow-schema 51.0.0", + "arrow-select 51.0.0", + "atoi", + "base64 0.22.0", + "chrono", + "half", + "lexical-core", + "num", + "ryu", +] + [[package]] name = "arrow-csv" version = "47.0.0" @@ -508,6 +554,18 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-data" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" +dependencies = [ + "arrow-buffer 51.0.0", + "arrow-schema 51.0.0", + "half", + "num", +] + [[package]] name = "arrow-flight" version = "50.0.0" @@ -575,6 +633,20 @@ dependencies = [ "flatbuffers", ] +[[package]] +name = "arrow-ipc" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" +dependencies = [ + "arrow-array 51.0.0", + "arrow-buffer 51.0.0", + "arrow-cast 51.0.0", + "arrow-data 51.0.0", + "arrow-schema 51.0.0", + "flatbuffers", +] + [[package]] name = "arrow-json" version = "47.0.0" @@ -687,10 +759,15 @@ version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" dependencies = [ - "bitflags 2.4.2", "serde", ] +[[package]] +name = "arrow-schema" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" + [[package]] name = "arrow-select" version = "47.0.0" @@ -719,6 +796,20 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-select" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" +dependencies = [ + "ahash 0.8.10", + "arrow-array 51.0.0", + "arrow-buffer 51.0.0", + "arrow-data 51.0.0", + "arrow-schema 51.0.0", + "num", +] + [[package]] name = "arrow-string" version = "47.0.0" @@ -757,21 +848,36 @@ version = "0.2.0" source = "git+https://github.com/datafuse-extras/arrow-udf?rev=a8fdfdd#a8fdfdd3622facb7d836a8da42a8a1c2d318f817" dependencies = [ "anyhow", - "arrow-array 50.0.0", - "arrow-buffer 50.0.0", - "arrow-schema 50.0.0", + "arrow-array 51.0.0", + "arrow-buffer 51.0.0", + "arrow-schema 51.0.0", "rquickjs", ] +[[package]] +name = "arrow-udf-python" +version = "0.1.0" +source = "git+https://github.com/datafuse-extras/arrow-udf?rev=a8fdfdd#a8fdfdd3622facb7d836a8da42a8a1c2d318f817" +dependencies = [ + "anyhow", + "arrow-array 51.0.0", + "arrow-buffer 51.0.0", + "arrow-ipc 51.0.0", + "arrow-schema 51.0.0", + "lazy_static", + "pyo3", + "pyo3-build-config", +] + [[package]] name = "arrow-udf-wasm" version = "0.2.2" source = "git+https://github.com/datafuse-extras/arrow-udf?rev=a8fdfdd#a8fdfdd3622facb7d836a8da42a8a1c2d318f817" dependencies = [ "anyhow", - "arrow-array 50.0.0", - "arrow-ipc 50.0.0", - "arrow-schema 50.0.0", + "arrow-array 51.0.0", + "arrow-ipc 51.0.0", + "arrow-schema 51.0.0", "async-trait", "base64 0.22.0", "genawaiter", @@ -1058,6 +1164,15 @@ dependencies = [ "syn 2.0.52", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic-polyfill" version = "1.0.3" @@ -1107,9 +1222,9 @@ dependencies = [ "bitflags 1.3.2", "bytes", "futures-util", - "http", - "http-body", - "hyper", + "http 0.2.11", + "http-body 0.4.6", + "hyper 0.14.28", "itoa", "matchit", "memchr", @@ -1133,8 +1248,8 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http", - "http-body", + "http 0.2.11", + "http-body 0.4.6", "mime", "rustversion", "tower-layer", @@ -1157,9 +1272,9 @@ dependencies = [ [[package]] name = "backon" -version = "0.4.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c79c8ef183b8b663e8cb19cf92fb7d98c56739977bd47eae2de2717bd5de2c2c" +checksum = "d67782c3f868daa71d3533538e98a8e13713231969def7536e8039606fc46bf0" dependencies = [ "fastrand 2.0.1", "futures-core", @@ -1232,28 +1347,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" -[[package]] -name = "bendpy" -version = "0.0.0" -dependencies = [ - "arrow 50.0.0", - "arrow-schema 50.0.0", - "ctor 0.2.7", - "databend-common-config", - "databend-common-exception", - "databend-common-expression", - "databend-common-license", - "databend-common-meta-app", - "databend-common-meta-embedded", - "databend-common-users", - "databend-query", - "pyo3", - "pyo3-build-config 0.18.3", - "tokio", - "tokio-stream", - "uuid", -] - [[package]] name = "bigdecimal" version = "0.4.2" @@ -1576,9 +1669,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" dependencies = [ "serde", ] @@ -2490,6 +2583,15 @@ dependencies = [ "wasmtime-types", ] +[[package]] +name = "crc32c" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89254598aa9b9fa608de44b3ae54c810f0f06d755e24c50177f1f8f31ff50ce2" +dependencies = [ + "rustc_version", +] + [[package]] name = "crc32fast" version = "1.4.0" @@ -2698,16 +2800,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "ctor" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad291aa74992b9b7a7e88c38acbbf6ad7e107f1d90ee8775b7bc1fc3394f485c" -dependencies = [ - "quote", - "syn 2.0.52", -] - [[package]] name = "ctrlc" version = "3.4.2" @@ -2847,7 +2939,7 @@ dependencies = [ "limits-rs", "log", "minitrace", - "opendal", + "opendal 0.45.1", "poem", "sentry", "serde", @@ -2864,10 +2956,10 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5af35e29bef4d4f89028a6b1251c5674df66a77f70cb6996c6e48bdfe5407c59" dependencies = [ - "http", + "http 0.2.11", "once_cell", "percent-encoding", - "reqwest", + "reqwest 0.11.24", "serde", "serde_json", "tokio", @@ -2992,7 +3084,7 @@ dependencies = [ "base64 0.21.7", "chrono", "databend-common-base", - "http", + "http 0.2.11", "tempfile", ] @@ -3183,8 +3275,8 @@ dependencies = [ "databend-common-arrow", "geos", "geozero", - "http", - "opendal", + "http 0.2.11", + "opendal 0.45.1", "parquet", "paste", "prost 0.12.3", @@ -3299,7 +3391,7 @@ dependencies = [ "comfy-table 6.2.0", "crc32fast", "criterion", - "ctor 0.1.26", + "ctor", "databend-common-arrow", "databend-common-ast", "databend-common-base", @@ -3352,7 +3444,7 @@ dependencies = [ "databend-common-base", "databend-common-exception", "hickory-resolver", - "hyper", + "hyper 0.14.28", "jwt-simple", "log", "serde", @@ -3508,7 +3600,7 @@ dependencies = [ "maplit", "num-derive", "num-traits", - "opendal", + "opendal 0.45.1", "paste", "serde", "serde_json", @@ -3823,7 +3915,7 @@ dependencies = [ "futures-util", "log", "minitrace", - "opendal", + "opendal 0.45.1", "parking_lot 0.12.1", "serde", "serde_json", @@ -3938,11 +4030,11 @@ dependencies = [ "databend-common-meta-app", "databend-common-storage", "databend-common-users", - "http", + "http 0.2.11", "log", "moka", - "opendal", - "reqwest", + "opendal 0.45.1", + "reqwest 0.11.24", "serde", "serde_json", ] @@ -3960,7 +4052,7 @@ dependencies = [ "chrono-tz", "cidr", "cron", - "ctor 0.1.26", + "ctor", "dashmap", "databend-common-ast", "databend-common-async-functions", @@ -4001,7 +4093,7 @@ dependencies = [ "minitrace", "num-derive", "num-traits", - "opendal", + "opendal 0.45.1", "ordered-float 4.2.0", "parking_lot 0.12.1", "percent-encoding", @@ -4035,12 +4127,12 @@ dependencies = [ "futures", "log", "metrics", - "opendal", + "opendal 0.45.1", "ordered-float 4.2.0", "parquet", "prometheus-client", "regex", - "reqwest", + "reqwest 0.11.24", "reqwest-hickory-resolver", "serde", "thiserror", @@ -4070,7 +4162,7 @@ dependencies = [ "match-template", "minitrace", "object_store", - "opendal", + "opendal 0.45.1", "ordered-float 4.2.0", "parquet", "serde", @@ -4150,7 +4242,7 @@ dependencies = [ "log", "metrics", "minitrace", - "opendal", + "opendal 0.45.1", "parquet", "rand 0.8.5", "serde", @@ -4195,7 +4287,7 @@ dependencies = [ "hive_metastore", "log", "minitrace", - "opendal", + "opendal 0.45.1", "ordered-float 4.2.0", "serde", "typetag", @@ -4227,7 +4319,7 @@ dependencies = [ "icelake", "match-template", "minitrace", - "opendal", + "opendal 0.45.1", "parquet", "serde", "tokio", @@ -4310,7 +4402,7 @@ dependencies = [ "ethnum 1.5.0 (git+https://github.com/ariesdevil/ethnum-rs?rev=4cb05f1)", "futures", "log", - "opendal", + "opendal 0.45.1", "parquet", "rand 0.8.5", "serde", @@ -4358,7 +4450,7 @@ dependencies = [ "databend-common-storage", "databend-storages-common-blocks", "databend-storages-common-table-meta", - "opendal", + "opendal 0.45.1", "parquet", "serde", "serde_json", @@ -4378,7 +4470,7 @@ dependencies = [ "databend-storages-common-table-meta", "enumflags2", "goldenfile", - "opendal", + "opendal 0.45.1", "serde", "serde_json", "tempfile", @@ -4409,7 +4501,7 @@ dependencies = [ "databend-common-storages-parquet", "enum-as-inner 0.6.0", "log", - "opendal", + "opendal 0.45.1", "serde", "serde_json", "typetag", @@ -4471,7 +4563,7 @@ dependencies = [ "jsonb 0.3.0 (git+https://github.com/datafuselabs/jsonb?rev=3fe3acd)", "log", "once_cell", - "opendal", + "opendal 0.45.1", "parking_lot 0.12.1", "regex", "serde", @@ -4542,7 +4634,7 @@ dependencies = [ "parking_lot 0.12.1", "passwords", "pretty_assertions", - "reqwest", + "reqwest 0.11.24", "serde", "serde_json", "wiremock", @@ -4692,7 +4784,7 @@ dependencies = [ "jsonb 0.3.0 (git+https://github.com/datafuselabs/jsonb?rev=3fe3acd)", "jwt-simple", "log", - "opendal", + "opendal 0.45.1", "tantivy", "tempfile", ] @@ -4794,7 +4886,7 @@ dependencies = [ "prometheus-client", "prost 0.12.3", "regex", - "reqwest", + "reqwest 0.11.24", "semver", "serde", "serde_json", @@ -4818,6 +4910,7 @@ dependencies = [ "arrow-ipc 50.0.0", "arrow-schema 50.0.0", "arrow-udf-js", + "arrow-udf-python", "arrow-udf-wasm", "async-backtrace", "async-channel 1.9.0", @@ -4832,7 +4925,7 @@ dependencies = [ "chrono-tz", "config", "criterion", - "ctor 0.1.26", + "ctor", "dashmap", "databend-common-arrow", "databend-common-ast", @@ -4909,7 +5002,7 @@ dependencies = [ "headers", "hex", "highway", - "http", + "http 0.2.11", "humantime", "indicatif", "itertools 0.10.5", @@ -4926,7 +5019,7 @@ dependencies = [ "num", "num_cpus", "once_cell", - "opendal", + "opendal 0.45.1", "opensrv-mysql", "opentelemetry", "opentelemetry-http", @@ -4943,10 +5036,10 @@ dependencies = [ "prost 0.12.3", "rand 0.8.5", "regex", - "reqwest", + "reqwest 0.11.24", "rmp-serde", "rustls 0.21.11", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "rustyline", "serde", "serde_json", @@ -4986,7 +5079,7 @@ dependencies = [ "databend-common-storage", "databend-common-storages-share", "enumflags2", - "opendal", + "opendal 0.45.1", "poem", "serde", "serde_json", @@ -5026,7 +5119,7 @@ dependencies = [ "mysql_async", "rand 0.8.5", "regex", - "reqwest", + "reqwest 0.11.24", "serde", "serde_json", "sqllogictest", @@ -5146,7 +5239,7 @@ dependencies = [ "databend-common-exception", "log", "minitrace", - "opendal", + "opendal 0.45.1", ] [[package]] @@ -7887,7 +7980,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.11", "indexmap 2.2.5", "slab", "tokio", @@ -8011,7 +8104,7 @@ dependencies = [ "base64 0.21.7", "bytes", "headers-core", - "http", + "http 0.2.11", "httpdate", "mime", "sha1", @@ -8023,7 +8116,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" dependencies = [ - "http", + "http 0.2.11", ] [[package]] @@ -8226,6 +8319,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http-body" version = "0.4.6" @@ -8233,7 +8337,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", - "http", + "http 0.2.11", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +dependencies = [ + "bytes", + "futures-core", + "http 1.1.0", + "http-body 1.0.0", "pin-project-lite", ] @@ -8247,7 +8374,7 @@ dependencies = [ "async-channel 1.9.0", "base64 0.13.1", "futures-lite 1.13.0", - "http", + "http 0.2.11", "infer", "pin-project-lite", "rand 0.7.3", @@ -8293,8 +8420,8 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", + "http 0.2.11", + "http-body 0.4.6", "httparse", "httpdate", "itoa", @@ -8306,6 +8433,25 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "httparse", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -8313,11 +8459,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", - "http", - "hyper", + "http 0.2.11", + "hyper 0.14.28", "rustls 0.21.11", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" +dependencies = [ + "futures-util", + "http 1.1.0", + "hyper 1.3.1", + "hyper-util", + "rustls 0.22.2", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.25.0", + "tower-service", ] [[package]] @@ -8326,12 +8489,32 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" dependencies = [ - "hyper", + "hyper 0.14.28", "pin-project-lite", "tokio", "tokio-io-timeout", ] +[[package]] +name = "hyper-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "hyper 1.3.1", + "pin-project-lite", + "socket2 0.5.6", + "tokio", + "tower", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -8384,11 +8567,11 @@ dependencies = [ "log", "murmur3", "once_cell", - "opendal", + "opendal 0.46.0", "ordered-float 3.9.2", "parquet", "regex", - "reqwest", + "reqwest 0.11.24", "rust_decimal", "serde", "serde_bytes", @@ -8644,7 +8827,7 @@ dependencies = [ "socket2 0.5.6", "widestring", "windows-sys 0.48.0", - "winreg", + "winreg 0.50.0", ] [[package]] @@ -9587,7 +9770,7 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fb9b5c752f145ac5046bccc3c4f62892e3c950c1d1eab80c5949cd68a2078db" dependencies = [ - "ctor 0.1.26", + "ctor", "web-time", ] @@ -9686,7 +9869,7 @@ dependencies = [ "bytes", "encoding_rs", "futures-util", - "http", + "http 0.2.11", "httparse", "log", "memchr", @@ -9761,13 +9944,13 @@ dependencies = [ "pin-project", "rand 0.8.5", "rustls 0.21.11", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "serde", "serde_json", "socket2 0.5.6", "thiserror", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", "tokio-util", "twox-hash", "url", @@ -10191,19 +10374,18 @@ dependencies = [ "futures", "getrandom 0.2.12", "hdrs", - "http", + "http 0.2.11", "log", "md-5", "minitrace", "moka", "once_cell", "percent-encoding", - "prometheus", "prometheus-client", "prost 0.11.9", "quick-xml 0.31.0", "reqsign", - "reqwest", + "reqwest 0.11.24", "serde", "serde_json", "sha2", @@ -10211,6 +10393,36 @@ dependencies = [ "uuid", ] +[[package]] +name = "opendal" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "328c4992328e8965e6a6ef102d38438b5fdc7d9b9107eda2377ba05379d9d544" +dependencies = [ + "anyhow", + "async-trait", + "backon", + "base64 0.22.0", + "bytes", + "chrono", + "crc32c", + "flagset", + "futures", + "getrandom 0.2.12", + "http 1.1.0", + "log", + "md-5", + "once_cell", + "percent-encoding", + "prometheus", + "quick-xml 0.31.0", + "reqwest 0.12.4", + "serde", + "serde_json", + "tokio", + "uuid", +] + [[package]] name = "openraft" version = "0.9.9" @@ -10259,7 +10471,7 @@ dependencies = [ "nom", "pin-project-lite", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", ] [[package]] @@ -10292,7 +10504,7 @@ checksum = "7f51189ce8be654f9b5f7e70e49967ed894e84a06fc35c6c042e64ac1fc5399e" dependencies = [ "async-trait", "bytes", - "http", + "http 0.2.11", "opentelemetry", ] @@ -10304,7 +10516,7 @@ checksum = "f24cda83b20ed2433c68241f918d0f6fdec8b1d43b7a9590ab4420c5095ca930" dependencies = [ "async-trait", "futures-core", - "http", + "http 0.2.11", "opentelemetry", "opentelemetry-proto", "opentelemetry-semantic-conventions", @@ -10925,8 +11137,8 @@ dependencies = [ "bytes", "futures-util", "headers", - "http", - "hyper", + "http 0.2.11", + "hyper 0.14.28", "mime", "multer", "nix 0.27.1", @@ -10936,14 +11148,14 @@ dependencies = [ "poem-derive", "regex", "rfc7239", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", "smallvec", "thiserror", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", "tokio-util", "tracing", "wildmatch", @@ -11552,9 +11764,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53bdbb96d49157e65d45cc287af5f32ffadd5f4761438b527b055fb0d4bb8233" +checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" dependencies = [ "cfg-if", "indoc", @@ -11562,7 +11774,7 @@ dependencies = [ "memoffset", "parking_lot 0.12.1", "portable-atomic 1.6.0", - "pyo3-build-config 0.20.3", + "pyo3-build-config", "pyo3-ffi", "pyo3-macros", "unindent", @@ -11570,19 +11782,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.18.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cb946f5ac61bb61a5014924910d936ebd2b23b705f7a4a3c40b05c720b079a3" -dependencies = [ - "once_cell", - "target-lexicon", -] - -[[package]] -name = "pyo3-build-config" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deaa5745de3f5231ce10517a1f5dd97d53e5a2fd77aa6b5842292085831d48d7" +checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" dependencies = [ "once_cell", "target-lexicon", @@ -11590,19 +11792,19 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b42531d03e08d4ef1f6e85a2ed422eb678b8cd62b762e53891c05faf0d4afa" +checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" dependencies = [ "libc", - "pyo3-build-config 0.20.3", + "pyo3-build-config", ] [[package]] name = "pyo3-macros" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7305c720fa01b8055ec95e484a6eca7a83c841267f0dd5280f0c8b8551d2c158" +checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -11612,13 +11814,13 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c7e9b68bb9c3149c5b0cade5d07f953d6d125eb4337723c4ccdb665f1f96185" +checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" dependencies = [ "heck 0.4.1", "proc-macro2", - "pyo3-build-config 0.20.3", + "pyo3-build-config", "quote", "syn 2.0.52", ] @@ -12013,14 +12215,14 @@ dependencies = [ "hex", "hmac", "home", - "http", + "http 0.2.11", "jsonwebtoken", "log", "once_cell", "percent-encoding", "quick-xml 0.31.0", "rand 0.8.5", - "reqwest", + "reqwest 0.11.24", "rsa 0.9.6", "rust-ini 0.20.0", "serde", @@ -12041,10 +12243,10 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", - "hyper", - "hyper-rustls", + "http 0.2.11", + "http-body 0.4.6", + "hyper 0.14.28", + "hyper-rustls 0.24.2", "ipnet", "js-sys", "log", @@ -12055,14 +12257,14 @@ dependencies = [ "pin-project-lite", "rustls 0.21.11", "rustls-native-certs", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "system-configuration", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", "tokio-util", "tower-service", "url", @@ -12071,7 +12273,50 @@ dependencies = [ "wasm-streams", "web-sys", "webpki-roots 0.25.4", - "winreg", + "winreg 0.50.0", +] + +[[package]] +name = "reqwest" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" +dependencies = [ + "base64 0.22.0", + "bytes", + "futures-core", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", + "hyper 1.3.1", + "hyper-rustls 0.26.0", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls 0.22.2", + "rustls-pemfile 2.1.2", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls 0.25.0", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "webpki-roots 0.26.1", + "winreg 0.52.0", ] [[package]] @@ -12081,9 +12326,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acc5825a66171aedfa62e562b26d906a42dce52f801aa425b8c8aff15edcda6e" dependencies = [ "hickory-resolver", - "hyper", + "hyper 0.14.28", "once_cell", - "reqwest", + "reqwest 0.11.24", ] [[package]] @@ -12477,7 +12722,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" dependencies = [ "openssl-probe", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "schannel", "security-framework", ] @@ -12491,6 +12736,16 @@ dependencies = [ "base64 0.21.7", ] +[[package]] +name = "rustls-pemfile" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +dependencies = [ + "base64 0.22.0", + "rustls-pki-types", +] + [[package]] name = "rustls-pki-types" version = "1.3.1" @@ -12690,7 +12945,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "766448f12e44d68e675d5789a261515c46ac6ccd240abdd451a9c46c84a49523" dependencies = [ "httpdate", - "reqwest", + "reqwest 0.11.24", "rustls 0.21.11", "sentry-backtrace", "sentry-contexts", @@ -14193,6 +14448,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" +dependencies = [ + "rustls 0.22.2", + "rustls-pki-types", + "tokio", +] + [[package]] name = "tokio-stream" version = "0.1.14" @@ -14322,9 +14588,9 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", - "hyper", + "http 0.2.11", + "http-body 0.4.6", + "hyper 0.14.28", "hyper-timeout", "percent-encoding", "pin-project", @@ -14349,18 +14615,18 @@ dependencies = [ "base64 0.21.7", "bytes", "h2", - "http", - "http-body", - "hyper", + "http 0.2.11", + "http-body 0.4.6", + "hyper 0.14.28", "hyper-timeout", "percent-encoding", "pin-project", "prost 0.12.3", "rustls 0.21.11", "rustls-native-certs", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", "tokio-stream", "tower", "tower-layer", @@ -15919,6 +16185,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "winreg" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "winx" version = "0.36.3" @@ -15942,7 +16218,7 @@ dependencies = [ "futures", "futures-timer", "http-types", - "hyper", + "hyper 0.14.28", "log", "once_cell", "regex", diff --git a/Cargo.toml b/Cargo.toml index f1eb517851f3..965006e6fbd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -105,7 +105,7 @@ members = [ "src/meta/service", # sqllogictest "tests/sqllogictests", - "src/bendpy", + # "src/bendpy", # sqlsmith "src/tests/sqlsmith", ] @@ -217,6 +217,7 @@ parquet_rs = { package = "parquet", version = "50" } # Crates from risingwavelabs arrow-udf-js = { package = "arrow-udf-js", git = "https://github.com/datafuse-extras/arrow-udf", rev = "a8fdfdd" } arrow-udf-wasm = { package = "arrow-udf-wasm", git = "https://github.com/datafuse-extras/arrow-udf", rev = "a8fdfdd" } +arrow-udf-python = { package = "arrow-udf-python", git = "https://github.com/datafuse-extras/arrow-udf", rev = "a8fdfdd" } # Serialization prost = { version = "0.12.1" } diff --git a/src/bendpy/pyproject.toml b/src/bendpy/pyproject.toml index 322bac60e054..99619e7fe8d1 100644 --- a/src/bendpy/pyproject.toml +++ b/src/bendpy/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "databend" -requires-python = ">=3.7" +requires-python = ">=3.12" description = "Databend Python Binding" classifiers = [ "Programming Language :: Rust", diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index cc7482c04be4..37c1686f3ff5 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -105,6 +105,7 @@ jsonb = { workspace = true } # Crates.io dependencies arrow-udf-js = { workspace = true } arrow-udf-wasm = { workspace = true } +arrow-udf-python = { workspace = true } arrow-array = { workspace = true } arrow-flight = { workspace = true } diff --git a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs index 3e3e533097d1..528ca6962c2d 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs @@ -40,6 +40,7 @@ use crate::pipelines::processors::Processor; pub enum ScriptRuntime { JavaScript(Arc>), WebAssembly(Arc>), + Python(Arc>), } impl ScriptRuntime { @@ -51,6 +52,11 @@ impl ScriptRuntime { ErrorCode::UDFDataError(format!("Cannot create js runtime: {}", err)) }), "wasm" => Self::create_wasm_runtime(code), + "python" => arrow_udf_python::Runtime::new() + .map(|runtime| ScriptRuntime::Python(Arc::new(RwLock::new(runtime)))) + .map_err(|err| { + ErrorCode::UDFDataError(format!("Cannot create python runtime: {}", err)) + }), _ => Err(ErrorCode::from_string(format!( "Invalid {} lang Runtime not supported", lang @@ -89,6 +95,16 @@ impl ScriptRuntime { &func.func_name, ) } + ScriptRuntime::Python(runtime) => { + let mut runtime = runtime.write(); + runtime.add_function_with_handler( + &func.name, + arrow_schema.field(0).data_type().clone(), + arrow_udf_python::CallMode::ReturnNullOnNullInput, + code, + &func.func_name, + ) + } // Ignore the execution for WASM context ScriptRuntime::WebAssembly(_) => Ok(()), }?; @@ -111,6 +127,16 @@ impl ScriptRuntime { )) })? } + + ScriptRuntime::Python(runtime) => { + let runtime = runtime.read(); + runtime.call(&func.name, input_batch).map_err(|err| { + ErrorCode::UDFDataError(format!( + "Python UDF '{}' execution failed: {}", + func.name, err + )) + })? + } ScriptRuntime::WebAssembly(runtime) => { let runtime = runtime.read(); runtime.call(&func.func_name, input_batch).map_err(|err| { diff --git a/src/query/sql/src/planner/binder/udf.rs b/src/query/sql/src/planner/binder/udf.rs index 416b52ea9156..d532830fe3df 100644 --- a/src/query/sql/src/planner/binder/udf.rs +++ b/src/query/sql/src/planner/binder/udf.rs @@ -41,7 +41,8 @@ use crate::Binder; impl Binder { fn is_allowed_language(language: &str) -> bool { - let allowed_languages: HashSet<&str> = ["javascript", "wasm"].iter().cloned().collect(); + let allowed_languages: HashSet<&str> = + ["javascript", "wasm", "python"].iter().cloned().collect(); allowed_languages.contains(&language.to_lowercase().as_str()) } @@ -148,7 +149,7 @@ impl Binder { if !Self::is_allowed_language(language) { return Err(ErrorCode::InvalidArgument(format!( - "Unallowed UDF language '{language}', must be javascript or wasm" + "Unallowed UDF language '{language}', must be python, javascript or wasm" ))); } diff --git a/tests/sqllogictests/suites/udf_native/03_0001_udf_js.test b/tests/sqllogictests/suites/udf_native/03_0001_udf_js.test index 4dfee0c02bb7..8e0e3ca2b9b8 100644 --- a/tests/sqllogictests/suites/udf_native/03_0001_udf_js.test +++ b/tests/sqllogictests/suites/udf_native/03_0001_udf_js.test @@ -1,5 +1,5 @@ statement ok -CREATE FUNCTION gcd_js (INT, INT) RETURNS BIGINT LANGUAGE javascript HANDLER = 'gcd_js' AS $$ +CREATE OR REPLACE FUNCTION gcd_js (INT, INT) RETURNS BIGINT LANGUAGE javascript HANDLER = 'gcd_js' AS $$ export function gcd_js(a, b) { while (b != 0) { let t = b; diff --git a/tests/sqllogictests/suites/udf_native/03_0001_udf_py.test b/tests/sqllogictests/suites/udf_native/03_0001_udf_py.test new file mode 100644 index 000000000000..9b630ff846a6 --- /dev/null +++ b/tests/sqllogictests/suites/udf_native/03_0001_udf_py.test @@ -0,0 +1,15 @@ +statement ok +CREATE OR REPLACE FUNCTION gcd_py (INT, INT) RETURNS BIGINT LANGUAGE python HANDLER = 'gcd' AS $$ +def gcd(a: int, b: int) -> int: + while b: + a, b = b, a % b + return a +$$ + +query F +select number, gcd_py(number * 3, number * 6) from numbers(5) where number > 0 order by 1; +---- +1 3 +2 6 +3 9 +4 12 From 4fbc8730380e1140f2097bea750d6d5e3073d7fb Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 13 May 2024 12:25:39 +0800 Subject: [PATCH 02/15] feat(query): disable bendpy --- .github/workflows/release.yml | 14 ++++++------- Cargo.toml | 1 + .../transforms/transform_udf_script.rs | 21 ++++++++++--------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 89e6d98aeb98..cdbd31fc56db 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -604,13 +604,13 @@ jobs: with: name: test-sqlsmith - bindings_python: - if: inputs.stable - needs: create_release - uses: ./.github/workflows/bindings.python.yml - secrets: inherit - with: - tag: ${{ needs.create_release.outputs.version }} + # bindings_python: + # if: inputs.stable + # needs: create_release + # uses: ./.github/workflows/bindings.python.yml + # secrets: inherit + # with: + # tag: ${{ needs.create_release.outputs.version }} notify: runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index b0724d327ba6..2bc8f1bc99b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -227,6 +227,7 @@ arrow-select = { version = "51" } parquet = { version = "51", features = ["async"] } parquet_rs = { package = "parquet", version = "51" } + # Crates from risingwavelabs arrow-udf-js = { package = "arrow-udf-js", git = "https://github.com/datafuse-extras/arrow-udf", rev = "a8fdfdd" } arrow-udf-wasm = { package = "arrow-udf-wasm", git = "https://github.com/datafuse-extras/arrow-udf", rev = "a8fdfdd" } diff --git a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs index 528ca6962c2d..c2b718ebd994 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs @@ -15,6 +15,7 @@ use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::sync::Arc; +use std::sync::LazyLock; use arrow_array::RecordBatch; use arrow_schema::Schema; @@ -37,10 +38,14 @@ use crate::pipelines::processors::InputPort; use crate::pipelines::processors::OutputPort; use crate::pipelines::processors::Processor; +/// python runtime should be only initialized once by gil lock, see: https://github.com/python/cpython/blob/main/Python/pystate.c +static GLOBAL_PYTHON_RUNTIME: LazyLock>> = + LazyLock::new(|| Arc::new(RwLock::new(arrow_udf_python::Runtime::new().unwrap()))); + pub enum ScriptRuntime { JavaScript(Arc>), WebAssembly(Arc>), - Python(Arc>), + Python, } impl ScriptRuntime { @@ -52,11 +57,7 @@ impl ScriptRuntime { ErrorCode::UDFDataError(format!("Cannot create js runtime: {}", err)) }), "wasm" => Self::create_wasm_runtime(code), - "python" => arrow_udf_python::Runtime::new() - .map(|runtime| ScriptRuntime::Python(Arc::new(RwLock::new(runtime)))) - .map_err(|err| { - ErrorCode::UDFDataError(format!("Cannot create python runtime: {}", err)) - }), + "python" => Ok(Self::Python), _ => Err(ErrorCode::from_string(format!( "Invalid {} lang Runtime not supported", lang @@ -95,8 +96,8 @@ impl ScriptRuntime { &func.func_name, ) } - ScriptRuntime::Python(runtime) => { - let mut runtime = runtime.write(); + ScriptRuntime::Python => { + let mut runtime = GLOBAL_PYTHON_RUNTIME.write(); runtime.add_function_with_handler( &func.name, arrow_schema.field(0).data_type().clone(), @@ -128,8 +129,8 @@ impl ScriptRuntime { })? } - ScriptRuntime::Python(runtime) => { - let runtime = runtime.read(); + ScriptRuntime::Python => { + let runtime = GLOBAL_PYTHON_RUNTIME.read(); runtime.call(&func.name, input_batch).map_err(|err| { ErrorCode::UDFDataError(format!( "Python UDF '{}' execution failed: {}", From e07d92a0aa66daea3686f19f177b433180e99b15 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 13 May 2024 13:14:57 +0800 Subject: [PATCH 03/15] feat(query): fmt --- Cargo.toml | 1 - src/query/service/Cargo.toml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1a71d31788a6..7e8a85ac2084 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -223,7 +223,6 @@ arrow-select = { version = "51" } parquet = { version = "51", features = ["async"] } parquet_rs = { package = "parquet", version = "51" } - # Crates from risingwavelabs arrow-udf-js = { package = "arrow-udf-js", git = "https://github.com/datafuse-extras/arrow-udf", rev = "a8fdfdd" } arrow-udf-wasm = { package = "arrow-udf-wasm", git = "https://github.com/datafuse-extras/arrow-udf", rev = "a8fdfdd" } diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index f9becb28507c..0fa097840fcb 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -104,8 +104,8 @@ jsonb = { workspace = true } # Crates.io dependencies arrow-udf-js = { workspace = true } -arrow-udf-wasm = { workspace = true } arrow-udf-python = { workspace = true } +arrow-udf-wasm = { workspace = true } arrow-array = { workspace = true } arrow-flight = { workspace = true } From 2fc0b1fe9d1ca5c8d85544fc33a48b755fa8a945 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 13 May 2024 13:18:27 +0800 Subject: [PATCH 04/15] feat(query): fmt --- .github/actions/setup_build_tool/action.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/actions/setup_build_tool/action.yml b/.github/actions/setup_build_tool/action.yml index 580be26142e6..06c49d366007 100644 --- a/.github/actions/setup_build_tool/action.yml +++ b/.github/actions/setup_build_tool/action.yml @@ -28,6 +28,8 @@ runs: cat <$BIN_LOCAL/build-tool #!/bin/bash script_name=\$(basename "\$0") + export PYO3_PYTHON=python3.12 + export PYO3_CROSS_PYTHON_VERSION=python3.12 export INTERACTIVE=false export TARGET=${{ inputs.target }} export CARGO_INCREMENTAL=0 From 4f30b54a12a5e700c710bb865676aa758821ebd7 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 13 May 2024 15:28:53 +0800 Subject: [PATCH 05/15] feat(query): fmt --- .github/actions/build_linux/action.yml | 4 ++-- .github/actions/build_linux_sanitizer/action.yml | 2 +- .github/actions/check/action.yml | 2 +- .github/actions/setup_build_tool/action.yml | 2 +- .github/actions/test_unit/action.yml | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/actions/build_linux/action.yml b/.github/actions/build_linux/action.yml index 1878382a85d4..d68ce549da4c 100644 --- a/.github/actions/build_linux/action.yml +++ b/.github/actions/build_linux/action.yml @@ -27,10 +27,10 @@ runs: using: "composite" steps: - name: Setup Build Tool - uses: ./.github/actions/setup_build_tool + uses: ./.github/actions/setup_build_tool with: target: ${{ inputs.target }} - bypass_env_vars: RUSTFLAGS,RUST_LOG + bypass_env_vars: RUSTFLAGS,RUST_LOG,PYO3_PYTHON,PYO3_CROSS_PYTHON_VERSION - name: Cross setup if: startsWith(inputs.target, 'aarch64-') diff --git a/.github/actions/build_linux_sanitizer/action.yml b/.github/actions/build_linux_sanitizer/action.yml index 615b59104b8b..a5e9b9542d7e 100644 --- a/.github/actions/build_linux_sanitizer/action.yml +++ b/.github/actions/build_linux_sanitizer/action.yml @@ -19,7 +19,7 @@ runs: uses: ./.github/actions/setup_build_tool with: target: ${{ inputs.target }} - bypass_env_vars: RUSTFLAGS,RUST_LOG + bypass_env_vars: RUSTFLAGS,RUST_LOG,PYO3_PYTHON - name: Cross setup if: startsWith(inputs.target, 'aarch64-') diff --git a/.github/actions/check/action.yml b/.github/actions/check/action.yml index 161e844958e7..c5e270d059bb 100644 --- a/.github/actions/check/action.yml +++ b/.github/actions/check/action.yml @@ -10,7 +10,7 @@ runs: - name: Setup Build Tool uses: ./.github/actions/setup_build_tool with: - bypass_env_vars: RUSTFLAGS,RUST_LOG,GITHUB_TOKEN + bypass_env_vars: RUSTFLAGS,RUST_LOG,GITHUB_TOKEN,PYO3_PYTHON - name: Check Apache License Header uses: korandoru/hawkeye@v2 diff --git a/.github/actions/setup_build_tool/action.yml b/.github/actions/setup_build_tool/action.yml index 06c49d366007..6a9ec6663ae2 100644 --- a/.github/actions/setup_build_tool/action.yml +++ b/.github/actions/setup_build_tool/action.yml @@ -8,7 +8,7 @@ inputs: bypass_env_vars: description: "Environment variables bypass to docker container" required: false - default: RUSTFLAGS,RUST_LOG + default: RUSTFLAGS,RUST_LOG,PYO3_PYTHON runs: using: "composite" steps: diff --git a/.github/actions/test_unit/action.yml b/.github/actions/test_unit/action.yml index adb8fc5719b4..b43a058fda7a 100644 --- a/.github/actions/test_unit/action.yml +++ b/.github/actions/test_unit/action.yml @@ -7,7 +7,7 @@ runs: - name: Setup Build Tool uses: ./.github/actions/setup_build_tool with: - bypass_env_vars: RUSTFLAGS,RUSTDOCFLAGS,RUST_TEST_THREADS,RUST_LOG,RUST_BACKTRACE + bypass_env_vars: RUSTFLAGS,RUSTDOCFLAGS,RUST_TEST_THREADS,RUST_LOG,RUST_BACKTRACE,PYO3_PYTHON - shell: bash run: | From fd65efc3726d6b17ff0175bd40f8897332622a95 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 13 May 2024 15:34:14 +0800 Subject: [PATCH 06/15] feat(query): fmt --- .github/actions/build_linux/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/build_linux/action.yml b/.github/actions/build_linux/action.yml index d68ce549da4c..4c9e27adff76 100644 --- a/.github/actions/build_linux/action.yml +++ b/.github/actions/build_linux/action.yml @@ -27,7 +27,7 @@ runs: using: "composite" steps: - name: Setup Build Tool - uses: ./.github/actions/setup_build_tool + uses: ./.github/actions/setup_build_tool with: target: ${{ inputs.target }} bypass_env_vars: RUSTFLAGS,RUST_LOG,PYO3_PYTHON,PYO3_CROSS_PYTHON_VERSION From 6acdbaae12d175b76478b167cffd8d84d9840ec3 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 13 May 2024 16:08:25 +0800 Subject: [PATCH 07/15] feat(query): python 3.12.2 --- scripts/setup/dev_setup.sh | 46 +++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/scripts/setup/dev_setup.sh b/scripts/setup/dev_setup.sh index 2df005e669cd..30bc30fdef01 100755 --- a/scripts/setup/dev_setup.sh +++ b/scripts/setup/dev_setup.sh @@ -133,32 +133,28 @@ function install_ziglang { function install_python3 { PACKAGE_MANAGER=$1 - echo "==> installing python3..." + echo "==> installing python3 via pyenv..." + curl https://pyenv.run | bash + + # Check if pyenv is already initialized in the profile + if ! command -v pyenv >/dev/null; then + # Add PYENV_ROOT to the profile + echo 'export PYENV_ROOT="$HOME/.pyenv"' >> $HOME/.profile + # Check if pyenv is in the PATH + echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> $HOME/.profile + # Initialize pyenv in the profile + echo 'eval "$(pyenv init -)"' >> ~/.profile + echo "pyenv environment variables have been added to ~/.profile" + else + echo "pyenv is already set in the environment variables." + fi - case "$PACKAGE_MANAGER" in - apt-get) - install_pkg python3-all-dev "$PACKAGE_MANAGER" - install_pkg python3-setuptools "$PACKAGE_MANAGER" - install_pkg python3-pip "$PACKAGE_MANAGER" - install_pkg libcairo2-dev "$PACKAGE_MANAGER" - ;; - apk) - install_pkg python3-dev "$PACKAGE_MANAGER" - install_pkg py3-pip "$PACKAGE_MANAGER" - install_pkg libffi-dev "$PACKAGE_MANAGER" - ;; - brew | pacman) - install_pkg python3 "$PACKAGE_MANAGER" - install_pkg cairo "$PACKAGE_MANAGER" - ;; - yum | dnf) - install_pkg python3-devel "$PACKAGE_MANAGER" - install_pkg cairo-devel "$PACKAGE_MANAGER" - ;; - *) - echo "Unable to install python3 with package manager: $PACKAGE_MANAGER" - exit 1 - ;; + # Source the profile to apply changes to the current session + source $HOME/.profile + + # install python3.12 + pyenv install 3.12.2 + pyenv global 3.12.2 esac } From cc5226cca588d438adaf1893c6b614ca9fa86b9b Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Tue, 14 May 2024 18:39:54 +0800 Subject: [PATCH 08/15] update PYO3_CROSS_PYTHON_VERSION --- .github/actions/setup_build_tool/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/setup_build_tool/action.yml b/.github/actions/setup_build_tool/action.yml index 6a9ec6663ae2..3c60298cf6f0 100644 --- a/.github/actions/setup_build_tool/action.yml +++ b/.github/actions/setup_build_tool/action.yml @@ -29,7 +29,7 @@ runs: #!/bin/bash script_name=\$(basename "\$0") export PYO3_PYTHON=python3.12 - export PYO3_CROSS_PYTHON_VERSION=python3.12 + export PYO3_CROSS_PYTHON_VERSION=3.12 export INTERACTIVE=false export TARGET=${{ inputs.target }} export CARGO_INCREMENTAL=0 From fe876326637c28c751dc0baf20b85df3ea19e8d6 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Tue, 21 May 2024 14:27:44 +0800 Subject: [PATCH 09/15] update --- Cargo.lock | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eddcf4d54dd1..f79b3d3bd9ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1077,31 +1077,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" [[package]] -<<<<<<< HEAD -======= -name = "bendpy" -version = "0.0.0" -dependencies = [ - "arrow", - "arrow-schema", - "ctor 0.2.7", - "databend-common-config", - "databend-common-exception", - "databend-common-expression", - "databend-common-license", - "databend-common-meta-app", - "databend-common-meta-embedded", - "databend-common-users", - "databend-query", - "pyo3", - "pyo3-build-config 0.18.3", - "tokio", - "tokio-stream", - "uuid", -] - -[[package]] ->>>>>>> main name = "bigdecimal" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" From bff7d7b8e09b8981d94aef8aeca00df8a5466e82 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 24 May 2024 22:13:27 +0800 Subject: [PATCH 10/15] merge --- Cargo.lock | 27 ++++--------------- Cargo.toml | 1 - src/binaries/Cargo.toml | 1 + src/common/exception/src/exception_code.rs | 1 + src/query/service/Cargo.toml | 4 +-- .../transforms/transform_udf_script.rs | 22 ++++++++++++--- 6 files changed, 27 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a2cc1cfb88a1..f196fcf7a778 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -288,21 +288,6 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" dependencies = [ -<<<<<<< HEAD - "arrow-arith 51.0.0", - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-cast 51.0.0", - "arrow-csv 51.0.0", - "arrow-data 51.0.0", - "arrow-ipc 51.0.0", - "arrow-json 51.0.0", - "arrow-ord 51.0.0", - "arrow-row 51.0.0", - "arrow-schema 51.0.0", - "arrow-select 51.0.0", - "arrow-string 51.0.0", -======= "arrow-arith", "arrow-array", "arrow-buffer", @@ -316,8 +301,6 @@ dependencies = [ "arrow-schema", "arrow-select", "arrow-string", - "pyo3", ->>>>>>> main ] [[package]] @@ -575,13 +558,13 @@ dependencies = [ [[package]] name = "arrow-udf-python" version = "0.1.0" -source = "git+https://github.com/datafuse-extras/arrow-udf?rev=a8fdfdd#a8fdfdd3622facb7d836a8da42a8a1c2d318f817" +source = "git+https://github.com/datafuse-extras/arrow-udf?rev=d0a21f0#d0a21f0fde330a0e5f658a55b58e0405d8372844" dependencies = [ "anyhow", - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-ipc 51.0.0", - "arrow-schema 51.0.0", + "arrow-array", + "arrow-buffer", + "arrow-ipc", + "arrow-schema", "lazy_static", "pyo3", "pyo3-build-config", diff --git a/Cargo.toml b/Cargo.toml index ce2358bb6cc3..02338848bd6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -229,7 +229,6 @@ flatbuffers = { version = "23" } # Crates from risingwavelabs arrow-udf-js = { package = "arrow-udf-js", git = "https://github.com/datafuse-extras/arrow-udf", rev = "d0a21f0" } arrow-udf-wasm = { package = "arrow-udf-wasm", git = "https://github.com/datafuse-extras/arrow-udf", rev = "d0a21f0" } -arrow-udf-python = { package = "arrow-udf-python", git = "https://github.com/datafuse-extras/arrow-udf", rev = "d0a21f0" } # Serialization prost = { version = "0.12.1" } diff --git a/src/binaries/Cargo.toml b/src/binaries/Cargo.toml index fde65f348b63..6679807b9d56 100644 --- a/src/binaries/Cargo.toml +++ b/src/binaries/Cargo.toml @@ -14,6 +14,7 @@ memory-profiling = [ "databend-query/memory-profiling", "databend-common-base/memory-profiling", ] +ee = ["databend-query/ee"] simd = ["databend-meta/simd", "databend-query/simd"] z3-prove = ["databend-query/z3-prove"] jemalloc = ["databend-common-base/jemalloc"] diff --git a/src/common/exception/src/exception_code.rs b/src/common/exception/src/exception_code.rs index 32403c2fb0f1..8fc226fb1628 100644 --- a/src/common/exception/src/exception_code.rs +++ b/src/common/exception/src/exception_code.rs @@ -191,6 +191,7 @@ build_exceptions! { /// /// For example: license key is expired LicenseKeyInvalid(1402), + EnterpriseFeatureNotEnable(1403), BackgroundJobAlreadyExists(1501), UnknownBackgroundJob(1502), diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index 027da8538d90..9c419c44727e 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -14,7 +14,7 @@ test = true [features] default = ["simd", "z3-prove"] simd = ["databend-common-arrow/simd"] - +ee = ["arrow-udf-python"] z3-prove = ["databend-common-sql/z3-prove"] disable_initial_exec_tls = ["databend-common-base/disable_initial_exec_tls"] @@ -103,7 +103,7 @@ jsonb = { workspace = true } # Crates.io dependencies arrow-udf-js = { workspace = true } -arrow-udf-python = { workspace = true } +arrow-udf-python = { package = "arrow-udf-python", git = "https://github.com/datafuse-extras/arrow-udf", rev = "d0a21f0", optional = true } arrow-udf-wasm = { workspace = true } arrow-array = { workspace = true } diff --git a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs index 209b91eb3705..7f89d0a6bae3 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs @@ -15,7 +15,6 @@ use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::sync::Arc; -use std::sync::LazyLock; use arrow_array::RecordBatch; use arrow_schema::Schema; @@ -41,8 +40,9 @@ use crate::pipelines::processors::OutputPort; use crate::pipelines::processors::Processor; /// python runtime should be only initialized once by gil lock, see: https://github.com/python/cpython/blob/main/Python/pystate.c -static GLOBAL_PYTHON_RUNTIME: LazyLock>> = - LazyLock::new(|| Arc::new(RwLock::new(arrow_udf_python::Runtime::new().unwrap()))); +#[cfg(feature = "ee")] +static GLOBAL_PYTHON_RUNTIME: std::sync::LazyLock>> = + std::sync::LazyLock::new(|| Arc::new(RwLock::new(arrow_udf_python::Runtime::new().unwrap()))); pub enum ScriptRuntime { JavaScript(Arc>), @@ -108,7 +108,9 @@ impl ScriptRuntime { &func.func_name, ) } + #[cfg(feature = "ee")] ScriptRuntime::Python => { + let code: &str = std::str::from_utf8(code)?; let mut runtime = GLOBAL_PYTHON_RUNTIME.write(); runtime.add_function_with_handler( &func.name, @@ -118,6 +120,12 @@ impl ScriptRuntime { &func.func_name, ) } + #[cfg(not(feature = "ee"))] + ScriptRuntime::Python => { + return Err(ErrorCode::EnterpriseFeatureNotEnable( + "Failed to create python script udf", + )); + } // Ignore the execution for WASM context ScriptRuntime::WebAssembly(_) => Ok(()), }?; @@ -140,7 +148,7 @@ impl ScriptRuntime { )) })? } - + #[cfg(feature = "ee")] ScriptRuntime::Python => { let runtime = GLOBAL_PYTHON_RUNTIME.read(); runtime.call(&func.name, input_batch).map_err(|err| { @@ -150,6 +158,12 @@ impl ScriptRuntime { )) })? } + #[cfg(not(feature = "ee"))] + ScriptRuntime::Python => { + return Err(ErrorCode::EnterpriseFeatureNotEnable( + "Failed to execute python script udf", + )); + } ScriptRuntime::WebAssembly(runtime) => { let runtime = runtime.read(); runtime.call(&func.func_name, input_batch).map_err(|err| { From 76313448c645f312ba51e53b3ef7da8294a5762c Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 24 May 2024 22:27:56 +0800 Subject: [PATCH 11/15] update --- .github/actions/build_linux/action.yml | 10 +++++++--- .github/actions/build_linux_sanitizer/action.yml | 2 +- .github/actions/check/action.yml | 2 +- .github/actions/setup_build_tool/action.yml | 3 ++- .github/actions/test_unit/action.yml | 2 +- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/actions/build_linux/action.yml b/.github/actions/build_linux/action.yml index 4c9e27adff76..e56d0142f3fd 100644 --- a/.github/actions/build_linux/action.yml +++ b/.github/actions/build_linux/action.yml @@ -18,6 +18,10 @@ inputs: features: description: "Features to build" required: false + default: "default,ee" + release_features: + description: "Features to build release" + required: false default: "default" category: description: "Category to upload" @@ -30,7 +34,7 @@ runs: uses: ./.github/actions/setup_build_tool with: target: ${{ inputs.target }} - bypass_env_vars: RUSTFLAGS,RUST_LOG,PYO3_PYTHON,PYO3_CROSS_PYTHON_VERSION + bypass_env_vars: RUSTFLAGS,RUST_LOG,PYO3_PYTHON,LD_LIBRARY_PATH,PYO3_CROSS_PYTHON_VERSION - name: Cross setup if: startsWith(inputs.target, 'aarch64-') @@ -93,7 +97,7 @@ runs: artifacts="meta,metactl,query,sqllogictests" for artifact in ${artifacts//,/ }; do echo "==> building databend-$artifact ..." - cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.features }} --release --bin databend-$artifact + cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.release_features }} --release --bin databend-$artifact done ls -lh ./target/${{ inputs.target }}/${{ env.BUILD_PROFILE }}/databend-$artifact @@ -104,7 +108,7 @@ runs: artifacts="${{ inputs.artifacts }}" for artifact in ${artifacts//,/ }; do echo "==> building databend-$artifact ..." - cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.features }} --release --bin databend-$artifact + cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.release_features }} --release --bin databend-$artifact done ls -lh ./target/${{ inputs.target }}/${{ env.BUILD_PROFILE }}/databend-$artifact diff --git a/.github/actions/build_linux_sanitizer/action.yml b/.github/actions/build_linux_sanitizer/action.yml index a5e9b9542d7e..87eb8c2a314a 100644 --- a/.github/actions/build_linux_sanitizer/action.yml +++ b/.github/actions/build_linux_sanitizer/action.yml @@ -19,7 +19,7 @@ runs: uses: ./.github/actions/setup_build_tool with: target: ${{ inputs.target }} - bypass_env_vars: RUSTFLAGS,RUST_LOG,PYO3_PYTHON + bypass_env_vars: RUSTFLAGS,RUST_LOG,PYO3_PYTHON,LD_LIBRARY_PATH - name: Cross setup if: startsWith(inputs.target, 'aarch64-') diff --git a/.github/actions/check/action.yml b/.github/actions/check/action.yml index c5e270d059bb..dd64ec578709 100644 --- a/.github/actions/check/action.yml +++ b/.github/actions/check/action.yml @@ -10,7 +10,7 @@ runs: - name: Setup Build Tool uses: ./.github/actions/setup_build_tool with: - bypass_env_vars: RUSTFLAGS,RUST_LOG,GITHUB_TOKEN,PYO3_PYTHON + bypass_env_vars: RUSTFLAGS,RUST_LOG,GITHUB_TOKEN,PYO3_PYTHON,LD_LIBRARY_PATH - name: Check Apache License Header uses: korandoru/hawkeye@v2 diff --git a/.github/actions/setup_build_tool/action.yml b/.github/actions/setup_build_tool/action.yml index 3c60298cf6f0..290c35a21398 100644 --- a/.github/actions/setup_build_tool/action.yml +++ b/.github/actions/setup_build_tool/action.yml @@ -8,7 +8,7 @@ inputs: bypass_env_vars: description: "Environment variables bypass to docker container" required: false - default: RUSTFLAGS,RUST_LOG,PYO3_PYTHON + default: RUSTFLAGS,RUST_LOG,PYO3_PYTHON,LD_LIBRARY_PATH runs: using: "composite" steps: @@ -29,6 +29,7 @@ runs: #!/bin/bash script_name=\$(basename "\$0") export PYO3_PYTHON=python3.12 + export LD_LIBRARY_PATH=/usr/local/lib export PYO3_CROSS_PYTHON_VERSION=3.12 export INTERACTIVE=false export TARGET=${{ inputs.target }} diff --git a/.github/actions/test_unit/action.yml b/.github/actions/test_unit/action.yml index b43a058fda7a..75860bd6a374 100644 --- a/.github/actions/test_unit/action.yml +++ b/.github/actions/test_unit/action.yml @@ -7,7 +7,7 @@ runs: - name: Setup Build Tool uses: ./.github/actions/setup_build_tool with: - bypass_env_vars: RUSTFLAGS,RUSTDOCFLAGS,RUST_TEST_THREADS,RUST_LOG,RUST_BACKTRACE,PYO3_PYTHON + bypass_env_vars: RUSTFLAGS,RUSTDOCFLAGS,RUST_TEST_THREADS,RUST_LOG,RUST_BACKTRACE,PYO3_PYTHON,LD_LIBRARY_PATH - shell: bash run: | From f5908548d13bd48e1f3362e58a9e915ebcc32130 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 24 May 2024 22:54:52 +0800 Subject: [PATCH 12/15] update --- .github/actions/build_linux/action.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/actions/build_linux/action.yml b/.github/actions/build_linux/action.yml index e56d0142f3fd..c983ecc92264 100644 --- a/.github/actions/build_linux/action.yml +++ b/.github/actions/build_linux/action.yml @@ -65,6 +65,9 @@ runs: echo "RUSTFLAGS=${flags} -C link-arg=-Wl,--compress-debug-sections=zlib" >> $GITHUB_ENV target=${{ inputs.target }} echo "BUILD_ARCH=${target/-unknown-linux-*}" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=/usr/local/lib" >> $GITHUB_ENV + echo "PYO3_PYTHON=python3.12" >> $GITHUB_ENV + echo "PYO3_CROSS_PYTHON_VERSION=/3.12" >> $GITHUB_ENV # build all binaries for debug - name: Build Debug From 368ef3e5eb4ac4203425b0d3064b4a8f74ffe815 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 24 May 2024 23:00:27 +0800 Subject: [PATCH 13/15] update --- .github/actions/build_linux/action.yml | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/.github/actions/build_linux/action.yml b/.github/actions/build_linux/action.yml index c983ecc92264..6d47c50f2c62 100644 --- a/.github/actions/build_linux/action.yml +++ b/.github/actions/build_linux/action.yml @@ -19,10 +19,6 @@ inputs: description: "Features to build" required: false default: "default,ee" - release_features: - description: "Features to build release" - required: false - default: "default" category: description: "Category to upload" required: false @@ -76,8 +72,12 @@ runs: run: | artifacts="meta,metactl,query,sqllogictests" for artifact in ${artifacts//,/ }; do + features=${{ inputs.features }} + if [ $artifact = 'query' ];then + features="${features},ee" + fi echo "==> building databend-$artifact ..." - cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.features }} --bin databend-$artifact + cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features $features --bin databend-$artifact done cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.features }} --bin open-sharing ls -lh ./target/${{ inputs.target }}/${{ env.BUILD_PROFILE }}/databend-* @@ -88,8 +88,13 @@ runs: run: | artifacts="${{ inputs.artifacts }}" for artifact in ${artifacts//,/ }; do + features=${{ inputs.features }} + if [ $artifact = 'query' ];then + features="${features},ee" + fi + echo "==> building databend-$artifact ..." - cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.features }} --bin databend-$artifact + cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features $features --bin databend-$artifact done ls -lh ./target/${{ inputs.target }}/${{ env.BUILD_PROFILE }}/databend-$artifact @@ -100,7 +105,7 @@ runs: artifacts="meta,metactl,query,sqllogictests" for artifact in ${artifacts//,/ }; do echo "==> building databend-$artifact ..." - cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.release_features }} --release --bin databend-$artifact + cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.features }} --release --bin databend-$artifact done ls -lh ./target/${{ inputs.target }}/${{ env.BUILD_PROFILE }}/databend-$artifact @@ -111,7 +116,7 @@ runs: artifacts="${{ inputs.artifacts }}" for artifact in ${artifacts//,/ }; do echo "==> building databend-$artifact ..." - cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.release_features }} --release --bin databend-$artifact + cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.features }} --release --bin databend-$artifact done ls -lh ./target/${{ inputs.target }}/${{ env.BUILD_PROFILE }}/databend-$artifact From 63c2a8d8cc82be08987a46851adc60c853313a69 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 25 May 2024 11:08:50 +0800 Subject: [PATCH 14/15] update --- .github/actions/build_linux/action.yml | 15 ++-------- .github/workflows/bindings.python.yml | 26 +++++++++-------- .../suites/udf_native/03_0001_udf_py.test | 29 ++++++++++--------- 3 files changed, 32 insertions(+), 38 deletions(-) diff --git a/.github/actions/build_linux/action.yml b/.github/actions/build_linux/action.yml index 6d47c50f2c62..68bb2a22b72e 100644 --- a/.github/actions/build_linux/action.yml +++ b/.github/actions/build_linux/action.yml @@ -18,7 +18,7 @@ inputs: features: description: "Features to build" required: false - default: "default,ee" + default: "default" category: description: "Category to upload" required: false @@ -72,12 +72,8 @@ runs: run: | artifacts="meta,metactl,query,sqllogictests" for artifact in ${artifacts//,/ }; do - features=${{ inputs.features }} - if [ $artifact = 'query' ];then - features="${features},ee" - fi echo "==> building databend-$artifact ..." - cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features $features --bin databend-$artifact + cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.features }} --bin databend-$artifact done cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.features }} --bin open-sharing ls -lh ./target/${{ inputs.target }}/${{ env.BUILD_PROFILE }}/databend-* @@ -88,13 +84,8 @@ runs: run: | artifacts="${{ inputs.artifacts }}" for artifact in ${artifacts//,/ }; do - features=${{ inputs.features }} - if [ $artifact = 'query' ];then - features="${features},ee" - fi - echo "==> building databend-$artifact ..." - cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features $features --bin databend-$artifact + cargo -Zgitoxide=fetch -Zgit=shallow-index,shallow-deps build --target ${{ inputs.target }} --features ${{ inputs.features }} --bin databend-$artifact done ls -lh ./target/${{ inputs.target }}/${{ env.BUILD_PROFILE }}/databend-$artifact diff --git a/.github/workflows/bindings.python.yml b/.github/workflows/bindings.python.yml index 1f756d2858e9..440d33a236da 100644 --- a/.github/workflows/bindings.python.yml +++ b/.github/workflows/bindings.python.yml @@ -1,18 +1,20 @@ name: Bindings Python on: - pull_request: - branches: - - main - paths: - - "src/**" - - ".github/workflows/bindings.python.yml" - workflow_call: - inputs: - tag: - description: Tag to release - required: true - type: string + ## uncomment it when bendpy is enabled + workflow_dispatch: + # pull_request: + # branches: + # - main + # paths: + # - "src/**" + # - ".github/workflows/bindings.python.yml" + # workflow_call: + # inputs: + # tag: + # description: Tag to release + # required: true + # type: string concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} diff --git a/tests/sqllogictests/suites/udf_native/03_0001_udf_py.test b/tests/sqllogictests/suites/udf_native/03_0001_udf_py.test index 9b630ff846a6..059763c94657 100644 --- a/tests/sqllogictests/suites/udf_native/03_0001_udf_py.test +++ b/tests/sqllogictests/suites/udf_native/03_0001_udf_py.test @@ -1,15 +1,16 @@ -statement ok -CREATE OR REPLACE FUNCTION gcd_py (INT, INT) RETURNS BIGINT LANGUAGE python HANDLER = 'gcd' AS $$ -def gcd(a: int, b: int) -> int: - while b: - a, b = b, a % b - return a -$$ +## enable it when compiled with ee feature +## statement ok +## CREATE OR REPLACE FUNCTION gcd_py (INT, INT) RETURNS BIGINT LANGUAGE python HANDLER = 'gcd' AS $$ +## def gcd(a: int, b: int) -> int: +## while b: +## a, b = b, a % b +## return a +## $$ -query F -select number, gcd_py(number * 3, number * 6) from numbers(5) where number > 0 order by 1; ----- -1 3 -2 6 -3 9 -4 12 +## query F +## select number, gcd_py(number * 3, number * 6) from numbers(5) where number > 0 order by 1; +## ---- +## 1 3 +## 2 6 +## 3 9 +## 4 12 From 41096576b8605155c9f780493670c66c02398e91 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 27 May 2024 09:19:30 +0800 Subject: [PATCH 15/15] update feature name --- src/binaries/Cargo.toml | 2 +- src/query/service/Cargo.toml | 2 +- .../processors/transforms/transform_udf_script.rs | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/binaries/Cargo.toml b/src/binaries/Cargo.toml index 6679807b9d56..416e18cd1807 100644 --- a/src/binaries/Cargo.toml +++ b/src/binaries/Cargo.toml @@ -14,7 +14,7 @@ memory-profiling = [ "databend-query/memory-profiling", "databend-common-base/memory-profiling", ] -ee = ["databend-query/ee"] +python-udf = ["databend-query/python-udf"] simd = ["databend-meta/simd", "databend-query/simd"] z3-prove = ["databend-query/z3-prove"] jemalloc = ["databend-common-base/jemalloc"] diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index 9c419c44727e..ca9ec64a5cbb 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -14,7 +14,7 @@ test = true [features] default = ["simd", "z3-prove"] simd = ["databend-common-arrow/simd"] -ee = ["arrow-udf-python"] +python-udf = ["arrow-udf-python"] z3-prove = ["databend-common-sql/z3-prove"] disable_initial_exec_tls = ["databend-common-base/disable_initial_exec_tls"] diff --git a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs index 7f89d0a6bae3..8d2e2b6fe513 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs @@ -40,7 +40,7 @@ use crate::pipelines::processors::OutputPort; use crate::pipelines::processors::Processor; /// python runtime should be only initialized once by gil lock, see: https://github.com/python/cpython/blob/main/Python/pystate.c -#[cfg(feature = "ee")] +#[cfg(feature = "python-udf")] static GLOBAL_PYTHON_RUNTIME: std::sync::LazyLock>> = std::sync::LazyLock::new(|| Arc::new(RwLock::new(arrow_udf_python::Runtime::new().unwrap()))); @@ -108,7 +108,7 @@ impl ScriptRuntime { &func.func_name, ) } - #[cfg(feature = "ee")] + #[cfg(feature = "python-udf")] ScriptRuntime::Python => { let code: &str = std::str::from_utf8(code)?; let mut runtime = GLOBAL_PYTHON_RUNTIME.write(); @@ -120,7 +120,7 @@ impl ScriptRuntime { &func.func_name, ) } - #[cfg(not(feature = "ee"))] + #[cfg(not(feature = "python-udf"))] ScriptRuntime::Python => { return Err(ErrorCode::EnterpriseFeatureNotEnable( "Failed to create python script udf", @@ -148,7 +148,7 @@ impl ScriptRuntime { )) })? } - #[cfg(feature = "ee")] + #[cfg(feature = "python-udf")] ScriptRuntime::Python => { let runtime = GLOBAL_PYTHON_RUNTIME.read(); runtime.call(&func.name, input_batch).map_err(|err| { @@ -158,7 +158,7 @@ impl ScriptRuntime { )) })? } - #[cfg(not(feature = "ee"))] + #[cfg(not(feature = "python-udf"))] ScriptRuntime::Python => { return Err(ErrorCode::EnterpriseFeatureNotEnable( "Failed to execute python script udf",