diff --git a/.github/actions/setup-spark-builder/action.yaml b/.github/actions/setup-spark-builder/action.yaml index 10cdbff59..8ea6a7d9a 100644 --- a/.github/actions/setup-spark-builder/action.yaml +++ b/.github/actions/setup-spark-builder/action.yaml @@ -29,7 +29,7 @@ inputs: comet-version: description: 'The Comet version to use for Spark' required: true - default: '0.1.0-SNAPSHOT' + default: '0.2.0-SNAPSHOT' runs: using: "composite" steps: diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 1cc6a1ff4..b8502fc01 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -71,7 +71,7 @@ jobs: with: spark-version: ${{ matrix.spark-version.full }} spark-short-version: ${{ matrix.spark-version.short }} - comet-version: '0.1.0-SNAPSHOT' # TODO: get this from pom.xml + comet-version: '0.2.0-SNAPSHOT' # TODO: get this from pom.xml - name: Run Spark tests run: | cd apache-spark diff --git a/.github/workflows/spark_sql_test_ansi.yml b/.github/workflows/spark_sql_test_ansi.yml index 34a393115..8dda98f04 100644 --- a/.github/workflows/spark_sql_test_ansi.yml +++ b/.github/workflows/spark_sql_test_ansi.yml @@ -69,7 +69,7 @@ jobs: with: spark-version: ${{ matrix.spark-version.full }} spark-short-version: ${{ matrix.spark-version.short }} - comet-version: '0.1.0-SNAPSHOT' # TODO: get this from pom.xml + comet-version: '0.2.0-SNAPSHOT' # TODO: get this from pom.xml - name: Run Spark tests run: | cd apache-spark diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..4f39d1440 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,22 @@ + + +# Apache DataFusion Comet Changelog + +Comprehensive changelogs for each release are available [here](dev/changelog). \ No newline at end of file diff --git a/common/pom.xml b/common/pom.xml index b912e8bd5..37eac9c91 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -26,7 +26,7 @@ under the License. org.apache.comet comet-parent-spark${spark.version.short}_${scala.binary.version} - 0.1.0-SNAPSHOT + 0.2.0-SNAPSHOT ../pom.xml diff --git a/dev/changelog/0.1.0.md b/dev/changelog/0.1.0.md new file mode 100644 index 000000000..92550c33c --- /dev/null +++ b/dev/changelog/0.1.0.md @@ -0,0 +1,425 @@ + + +# DataFusion Comet 0.1.0 Changelog + +This release consists of 343 commits from 41 contributors. See credits at the end of this changelog for more information. + +**Implemented enhancements:** + +- feat: Add native shuffle and columnar shuffle [#30](https://github.com/apache/datafusion-comet/pull/30) (viirya) +- feat: Support Emit::First for SumDecimalGroupsAccumulator [#47](https://github.com/apache/datafusion-comet/pull/47) (viirya) +- feat: Nested map support for columnar shuffle [#51](https://github.com/apache/datafusion-comet/pull/51) (viirya) +- feat: Support Count(Distinct) and similar aggregation functions [#42](https://github.com/apache/datafusion-comet/pull/42) (huaxingao) +- feat: Upgrade to `jni-rs` 0.21 [#50](https://github.com/apache/datafusion-comet/pull/50) (sunchao) +- feat: Handle exception thrown from native side [#61](https://github.com/apache/datafusion-comet/pull/61) (sunchao) +- feat: Support InSet expression in Comet [#59](https://github.com/apache/datafusion-comet/pull/59) (viirya) +- feat: Add `CometNativeException` for exceptions thrown from the native side [#62](https://github.com/apache/datafusion-comet/pull/62) (sunchao) +- feat: Add cause to native exception [#63](https://github.com/apache/datafusion-comet/pull/63) (viirya) +- feat: Pull based native execution [#69](https://github.com/apache/datafusion-comet/pull/69) (viirya) +- feat: Add executeColumnarCollectIterator to CometExec to collect Comet operator result [#71](https://github.com/apache/datafusion-comet/pull/71) (viirya) +- feat: Add CometBroadcastExchangeExec to support broadcasting the result of Comet native operator [#80](https://github.com/apache/datafusion-comet/pull/80) (viirya) +- feat: Reduce memory consumption when writing sorted shuffle files [#82](https://github.com/apache/datafusion-comet/pull/82) (sunchao) +- feat: Add struct/map as unsupported map key/value for columnar shuffle [#84](https://github.com/apache/datafusion-comet/pull/84) (viirya) +- feat: Support multiple input sources for CometNativeExec [#87](https://github.com/apache/datafusion-comet/pull/87) (viirya) +- feat: Date and timestamp trunc with format array [#94](https://github.com/apache/datafusion-comet/pull/94) (parthchandra) +- feat: Support `First`/`Last` aggregate functions [#97](https://github.com/apache/datafusion-comet/pull/97) (huaxingao) +- feat: Add support of TakeOrderedAndProjectExec in Comet [#88](https://github.com/apache/datafusion-comet/pull/88) (viirya) +- feat: Support Binary in shuffle writer [#106](https://github.com/apache/datafusion-comet/pull/106) (advancedxy) +- feat: Add license header by spotless:apply automatically [#110](https://github.com/apache/datafusion-comet/pull/110) (advancedxy) +- feat: Add dictionary binary to shuffle writer [#111](https://github.com/apache/datafusion-comet/pull/111) (viirya) +- feat: Minimize number of connections used by parallel reader [#126](https://github.com/apache/datafusion-comet/pull/126) (parthchandra) +- feat: Support CollectLimit operator [#100](https://github.com/apache/datafusion-comet/pull/100) (advancedxy) +- feat: Enable min/max for boolean type [#165](https://github.com/apache/datafusion-comet/pull/165) (huaxingao) +- feat: Introduce `CometTaskMemoryManager` and native side memory pool [#83](https://github.com/apache/datafusion-comet/pull/83) (sunchao) +- feat: Fix old style names [#201](https://github.com/apache/datafusion-comet/pull/201) (comphead) +- feat: enable comet shuffle manager for comet shell [#204](https://github.com/apache/datafusion-comet/pull/204) (zuston) +- feat: Support bitwise aggregate functions [#197](https://github.com/apache/datafusion-comet/pull/197) (huaxingao) +- feat: Support BloomFilterMightContain expr [#179](https://github.com/apache/datafusion-comet/pull/179) (advancedxy) +- feat: Support sort merge join [#178](https://github.com/apache/datafusion-comet/pull/178) (viirya) +- feat: Support HashJoin operator [#194](https://github.com/apache/datafusion-comet/pull/194) (viirya) +- feat: Remove use of nightly int_roundings feature [#228](https://github.com/apache/datafusion-comet/pull/228) (psvri) +- feat: Support Broadcast HashJoin [#211](https://github.com/apache/datafusion-comet/pull/211) (viirya) +- feat: Enable Comet broadcast by default [#213](https://github.com/apache/datafusion-comet/pull/213) (viirya) +- feat: Add CometRowToColumnar operator [#206](https://github.com/apache/datafusion-comet/pull/206) (advancedxy) +- feat: Document the class path / classloader issue with the shuffle manager [#256](https://github.com/apache/datafusion-comet/pull/256) (holdenk) +- feat: Port Datafusion Covariance to Comet [#234](https://github.com/apache/datafusion-comet/pull/234) (huaxingao) +- feat: Add manual test to calculate spark builtin functions coverage [#263](https://github.com/apache/datafusion-comet/pull/263) (comphead) +- feat: Support ANSI mode in CAST from String to Bool [#290](https://github.com/apache/datafusion-comet/pull/290) (andygrove) +- feat: Add extended explain info to Comet plan [#255](https://github.com/apache/datafusion-comet/pull/255) (parthchandra) +- feat: Improve CometSortMergeJoin statistics [#304](https://github.com/apache/datafusion-comet/pull/304) (planga82) +- feat: Add compatibility guide [#316](https://github.com/apache/datafusion-comet/pull/316) (andygrove) +- feat: Improve CometHashJoin statistics [#309](https://github.com/apache/datafusion-comet/pull/309) (planga82) +- feat: Support Variance [#297](https://github.com/apache/datafusion-comet/pull/297) (huaxingao) +- feat: Support murmur3_hash and sha2 family hash functions [#226](https://github.com/apache/datafusion-comet/pull/226) (advancedxy) +- feat: Disable cast string to timestamp by default [#337](https://github.com/apache/datafusion-comet/pull/337) (andygrove) +- feat: Improve CometBroadcastHashJoin statistics [#339](https://github.com/apache/datafusion-comet/pull/339) (planga82) +- feat: Implement Spark-compatible CAST from string to integral types [#307](https://github.com/apache/datafusion-comet/pull/307) (andygrove) +- feat: Implement Spark-compatible CAST from string to timestamp types [#335](https://github.com/apache/datafusion-comet/pull/335) (vaibhawvipul) +- feat: Implement Spark-compatible CAST float/double to string [#346](https://github.com/apache/datafusion-comet/pull/346) (mattharder91) +- feat: Only allow incompatible cast expressions to run in comet if a config is enabled [#362](https://github.com/apache/datafusion-comet/pull/362) (andygrove) +- feat: Implement Spark-compatible CAST between integer types [#340](https://github.com/apache/datafusion-comet/pull/340) (ganeshkumar269) +- feat: Supports Stddev [#348](https://github.com/apache/datafusion-comet/pull/348) (huaxingao) +- feat: Improve cast compatibility tests and docs [#379](https://github.com/apache/datafusion-comet/pull/379) (andygrove) +- feat: Implement Spark-compatible CAST from non-integral numeric types to integral types [#399](https://github.com/apache/datafusion-comet/pull/399) (rohitrastogi) +- feat: Implement Spark unhex [#342](https://github.com/apache/datafusion-comet/pull/342) (tshauck) +- feat: Enable columnar shuffle by default [#250](https://github.com/apache/datafusion-comet/pull/250) (viirya) +- feat: Implement Spark-compatible CAST from floating-point/double to decimal [#384](https://github.com/apache/datafusion-comet/pull/384) (vaibhawvipul) +- feat: Add logging to explain reasons for Comet not being able to run a query stage natively [#397](https://github.com/apache/datafusion-comet/pull/397) (andygrove) +- feat: Add support for TryCast expression in Spark 3.2 and 3.3 [#416](https://github.com/apache/datafusion-comet/pull/416) (vaibhawvipul) +- feat: Supports UUID column [#395](https://github.com/apache/datafusion-comet/pull/395) (huaxingao) +- feat: correlation support [#456](https://github.com/apache/datafusion-comet/pull/456) (huaxingao) +- feat: Implement Spark-compatible CAST from String to Date [#383](https://github.com/apache/datafusion-comet/pull/383) (vidyasankarv) +- feat: Add COMET_SHUFFLE_MODE config to control Comet shuffle mode [#460](https://github.com/apache/datafusion-comet/pull/460) (viirya) +- feat: Add random row generator in data generator [#451](https://github.com/apache/datafusion-comet/pull/451) (advancedxy) +- feat: Add xxhash64 function support [#424](https://github.com/apache/datafusion-comet/pull/424) (advancedxy) +- feat: add hex scalar function [#449](https://github.com/apache/datafusion-comet/pull/449) (tshauck) +- feat: Add "Comet Fuzz" fuzz-testing utility [#472](https://github.com/apache/datafusion-comet/pull/472) (andygrove) +- feat: Use enum to represent CAST eval_mode in expr.proto [#415](https://github.com/apache/datafusion-comet/pull/415) (prashantksharma) +- feat: Implement ANSI support for UnaryMinus [#471](https://github.com/apache/datafusion-comet/pull/471) (vaibhawvipul) +- feat: Add specific fuzz tests for cast and try_cast and fix NPE found during fuzz testing [#514](https://github.com/apache/datafusion-comet/pull/514) (andygrove) +- feat: Add fuzz testing for arithmetic expressions [#519](https://github.com/apache/datafusion-comet/pull/519) (andygrove) +- feat: Add HashJoin support for BuildRight [#437](https://github.com/apache/datafusion-comet/pull/437) (viirya) +- feat: Fix Comet error message [#544](https://github.com/apache/datafusion-comet/pull/544) (comphead) +- feat: Support Ansi mode in abs function [#500](https://github.com/apache/datafusion-comet/pull/500) (planga82) +- feat: Enable xxhash64 by default [#583](https://github.com/apache/datafusion-comet/pull/583) (andygrove) +- feat: Add experimental support for Apache Spark 3.5.1 [#587](https://github.com/apache/datafusion-comet/pull/587) (andygrove) +- feat: add nullOnDivideByZero for Covariance [#564](https://github.com/apache/datafusion-comet/pull/564) (huaxingao) +- feat: Implement more efficient version of xxhash64 [#575](https://github.com/apache/datafusion-comet/pull/575) (andygrove) +- feat: Enable Spark SQL tests for Spark 3.5.1 [#603](https://github.com/apache/datafusion-comet/pull/603) (andygrove) +- feat: Initial support for Window function [#599](https://github.com/apache/datafusion-comet/pull/599) (huaxingao) +- feat: IsNaN expression in Comet [#612](https://github.com/apache/datafusion-comet/pull/612) (eejbyfeldt) +- feat: Add support for CreateNamedStruct [#620](https://github.com/apache/datafusion-comet/pull/620) (eejbyfeldt) +- feat: add cargo machete to remove udeps [#641](https://github.com/apache/datafusion-comet/pull/641) (vaibhawvipul) +- feat: Upgrade to DataFusion 40.0.0-rc1 [#644](https://github.com/apache/datafusion-comet/pull/644) (andygrove) +- feat: Use unified allocator for execution iterators [#613](https://github.com/apache/datafusion-comet/pull/613) (viirya) +- feat: Create new `datafusion-comet-spark-expr` crate containing Spark-compatible DataFusion expressions [#638](https://github.com/apache/datafusion-comet/pull/638) (andygrove) +- feat: Move `IfExpr` to `spark-expr` crate [#653](https://github.com/apache/datafusion-comet/pull/653) (andygrove) +- feat: Upgrade to DataFusion 40 [#657](https://github.com/apache/datafusion-comet/pull/657) (andygrove) +- feat: Show user a more intuitive message when queries fall back to Spark [#656](https://github.com/apache/datafusion-comet/pull/656) (andygrove) +- feat: Enable remaining Spark 3.5.1 tests [#676](https://github.com/apache/datafusion-comet/pull/676) (andygrove) +- feat: Spark-4.0 widening type support [#604](https://github.com/apache/datafusion-comet/pull/604) (kazuyukitanimura) +- feat: add scalar subquery pushdown to scan [#678](https://github.com/apache/datafusion-comet/pull/678) (parthchandra) + +**Fixed bugs:** + +- fix: Comet sink operator should not have children operators [#26](https://github.com/apache/datafusion-comet/pull/26) (viirya) +- fix: Fix the UnionExec match branches in CometExecRule [#68](https://github.com/apache/datafusion-comet/pull/68) (wankunde) +- fix: Appending null values to element array builders of StructBuilder for null row in a StructArray [#78](https://github.com/apache/datafusion-comet/pull/78) (viirya) +- fix: Fix compilation error for CometBroadcastExchangeExec [#86](https://github.com/apache/datafusion-comet/pull/86) (viirya) +- fix: Avoid exception caused by broadcasting empty result [#92](https://github.com/apache/datafusion-comet/pull/92) (wForget) +- fix: Add num_rows when building RecordBatch [#103](https://github.com/apache/datafusion-comet/pull/103) (advancedxy) +- fix: Cast string to boolean not compatible with Spark [#107](https://github.com/apache/datafusion-comet/pull/107) (erenavsarogullari) +- fix: Another attempt to fix libcrypto.dylib loading issue [#112](https://github.com/apache/datafusion-comet/pull/112) (advancedxy) +- fix: Fix compilation error for Spark 3.2 & 3.3 [#117](https://github.com/apache/datafusion-comet/pull/117) (sunchao) +- fix: Fix corrupted AggregateMode when transforming plan parameters [#118](https://github.com/apache/datafusion-comet/pull/118) (viirya) +- fix: bitwise shift with different left/right types [#135](https://github.com/apache/datafusion-comet/pull/135) (viirya) +- fix: Avoid null exception in removeSubquery [#147](https://github.com/apache/datafusion-comet/pull/147) (viirya) +- fix: rat check error in vscode ide [#161](https://github.com/apache/datafusion-comet/pull/161) (thexiay) +- fix: Final aggregation should not bind to the input of partial aggregation [#155](https://github.com/apache/datafusion-comet/pull/155) (viirya) +- fix: coalesce should return correct datatype [#168](https://github.com/apache/datafusion-comet/pull/168) (viirya) +- fix: attempt to divide by zero error on decimal division [#172](https://github.com/apache/datafusion-comet/pull/172) (viirya) +- fix: Aggregation without aggregation expressions should use correct result expressions [#175](https://github.com/apache/datafusion-comet/pull/175) (viirya) +- fix: Comet native operator can be executed after ReusedExchange [#187](https://github.com/apache/datafusion-comet/pull/187) (viirya) +- fix: Try to convert a static list into a set in Rust [#184](https://github.com/apache/datafusion-comet/pull/184) (advancedxy) +- fix: Include active spiller when computing peak shuffle memory [#196](https://github.com/apache/datafusion-comet/pull/196) (sunchao) +- fix: CometExecRule should handle ShuffleQueryStage and ReusedExchange [#186](https://github.com/apache/datafusion-comet/pull/186) (viirya) +- fix: Use `makeCopy` to change relation in `FileSourceScanExec` [#207](https://github.com/apache/datafusion-comet/pull/207) (viirya) +- fix: Remove duplicate byte array allocation for CometDictionary [#224](https://github.com/apache/datafusion-comet/pull/224) (viirya) +- fix: Remove redundant data copy in columnar shuffle [#233](https://github.com/apache/datafusion-comet/pull/233) (viirya) +- fix: Only maps FIXED_LEN_BYTE_ARRAY to String for uuid type [#238](https://github.com/apache/datafusion-comet/pull/238) (huaxingao) +- fix: Reduce RowPartition memory allocation [#244](https://github.com/apache/datafusion-comet/pull/244) (viirya) +- fix: Remove wrong calculation for Murmur3Hash for float with null input [#245](https://github.com/apache/datafusion-comet/pull/245) (advancedxy) +- fix: Deallocate row addresses and size arrays after exporting [#246](https://github.com/apache/datafusion-comet/pull/246) (viirya) +- fix: Fix wrong children expression order in IfExpr [#249](https://github.com/apache/datafusion-comet/pull/249) (viirya) +- fix: Average expression in Comet Final should handle all null inputs from partial Spark aggregation [#261](https://github.com/apache/datafusion-comet/pull/261) (viirya) +- fix: Only trigger Comet Final aggregation on Comet partial aggregation [#264](https://github.com/apache/datafusion-comet/pull/264) (viirya) +- fix: incorrect result on Comet multiple column distinct count [#268](https://github.com/apache/datafusion-comet/pull/268) (viirya) +- fix: Avoid using CometConf [#266](https://github.com/apache/datafusion-comet/pull/266) (snmvaughan) +- fix: Fix arrow error when sorting on empty batch [#271](https://github.com/apache/datafusion-comet/pull/271) (viirya) +- fix: Include license using `#` instead of using XML comment [#274](https://github.com/apache/datafusion-comet/pull/274) (snmvaughan) +- fix: Comet should not translate try_sum to native sum expression [#277](https://github.com/apache/datafusion-comet/pull/277) (viirya) +- fix: incorrect result with aggregate expression with filter [#284](https://github.com/apache/datafusion-comet/pull/284) (viirya) +- fix: Comet should not fail on negative limit parameter [#288](https://github.com/apache/datafusion-comet/pull/288) (viirya) +- fix: Comet columnar shuffle should not be on top of another Comet shuffle operator [#296](https://github.com/apache/datafusion-comet/pull/296) (viirya) +- fix: Iceberg scan transition should be in front of other data source v2 [#302](https://github.com/apache/datafusion-comet/pull/302) (viirya) +- fix: CometExec's outputPartitioning might not be same as Spark expects after AQE interferes [#299](https://github.com/apache/datafusion-comet/pull/299) (viirya) +- fix: CometShuffleExchangeExec logical link should be correct [#324](https://github.com/apache/datafusion-comet/pull/324) (viirya) +- fix: SortMergeJoin with unsupported key type should fall back to Spark [#355](https://github.com/apache/datafusion-comet/pull/355) (viirya) +- fix: limit with offset should return correct results [#359](https://github.com/apache/datafusion-comet/pull/359) (viirya) +- fix: Disable Comet shuffle with AQE coalesce partitions enabled [#380](https://github.com/apache/datafusion-comet/pull/380) (viirya) +- fix: Unknown operator id when explain with formatted mode [#410](https://github.com/apache/datafusion-comet/pull/410) (leoluan2009) +- fix: Reuse CometBroadcastExchangeExec with Spark ReuseExchangeAndSubquery rule [#441](https://github.com/apache/datafusion-comet/pull/441) (viirya) +- fix: newFileScanRDD should not take constructor from custom Spark versions [#412](https://github.com/apache/datafusion-comet/pull/412) (ceppelli) +- fix: fix CometNativeExec.doCanonicalize for ReusedExchangeExec [#447](https://github.com/apache/datafusion-comet/pull/447) (viirya) +- fix: Enable cast string to int tests and fix compatibility issue [#453](https://github.com/apache/datafusion-comet/pull/453) (andygrove) +- fix: Compute murmur3 hash with dictionary input correctly [#433](https://github.com/apache/datafusion-comet/pull/433) (advancedxy) +- fix: Only delegate to DataFusion cast when we know that it is compatible with Spark [#461](https://github.com/apache/datafusion-comet/pull/461) (andygrove) +- fix: `ColumnReader.loadVector` should initiate `CometDictionary` after re-import arrays [#473](https://github.com/apache/datafusion-comet/pull/473) (viirya) +- fix: substring with negative indices should produce correct result [#470](https://github.com/apache/datafusion-comet/pull/470) (sonhmai) +- fix: CometReader.loadVector should not overwrite dictionary ids [#476](https://github.com/apache/datafusion-comet/pull/476) (viirya) +- fix: Reuse previous CometDictionary Java arrays [#489](https://github.com/apache/datafusion-comet/pull/489) (viirya) +- fix: Fallback to Spark for LIKE with custom escape character [#478](https://github.com/apache/datafusion-comet/pull/478) (sujithjay) +- fix: Incorrect input schema when preparing result expressions for HashAggregation [#501](https://github.com/apache/datafusion-comet/pull/501) (viirya) +- fix: Input batch to ShuffleRepartitioner.insert_batch should not be larger than configured batch size [#523](https://github.com/apache/datafusion-comet/pull/523) (viirya) +- fix: Fix integer overflow in date_parser [#529](https://github.com/apache/datafusion-comet/pull/529) (eejbyfeldt) +- fix: null character not permitted in chr function [#513](https://github.com/apache/datafusion-comet/pull/513) (vaibhawvipul) +- fix: Overflow when reading Timestamp from parquet file [#542](https://github.com/apache/datafusion-comet/pull/542) (eejbyfeldt) +- fix: Re-implement some Parquet decode methods without `copy_nonoverlapping` [#558](https://github.com/apache/datafusion-comet/pull/558) (andygrove) +- fix: requested character too large for encoding in chr function [#552](https://github.com/apache/datafusion-comet/pull/552) (vaibhawvipul) +- fix: Running cargo build always triggers rebuild [#579](https://github.com/apache/datafusion-comet/pull/579) (eejbyfeldt) +- fix: Avoid recursive call to `canonicalizePlans` [#582](https://github.com/apache/datafusion-comet/pull/582) (viirya) +- fix: Return error in pre_timestamp_cast instead of panic [#543](https://github.com/apache/datafusion-comet/pull/543) (eejbyfeldt) +- perf: Add criterion benchmark for xxhash64 function [#560](https://github.com/apache/datafusion-comet/pull/560) (andygrove) +- fix: Fix range out of index error with a temporary workaround [#584](https://github.com/apache/datafusion-comet/pull/584) (viirya) +- fix: Improve error "BroadcastExchange is not supported" [#577](https://github.com/apache/datafusion-comet/pull/577) (parthchandra) +- fix: Avoid creating huge duplicate of canonicalized plans for CometNativeExec [#639](https://github.com/apache/datafusion-comet/pull/639) (viirya) +- fix: Tag ignored tests that require SubqueryBroadcastExec [#647](https://github.com/apache/datafusion-comet/pull/647) (parthchandra) +- fix: Optimize some functions to rewrite dictionary-encoded strings [#627](https://github.com/apache/datafusion-comet/pull/627) (vaibhawvipul) +- fix: Remove nightly flag in release-nogit target in Makefile [#667](https://github.com/apache/datafusion-comet/pull/667) (andygrove) +- fix: change the not exists base image apache/spark:3.4.3 to 3.4.2 [#686](https://github.com/apache/datafusion-comet/pull/686) (haoxins) +- fix: Spark 4.0 SparkArithmeticException test [#688](https://github.com/apache/datafusion-comet/pull/688) (kazuyukitanimura) +- fix: address failure caused by method signature change in SPARK-48791 [#693](https://github.com/apache/datafusion-comet/pull/693) (parthchandra) + +**Documentation updates:** + +- doc: Add Quickstart Comet doc section [#125](https://github.com/apache/datafusion-comet/pull/125) (comphead) +- doc: Minor fix Getting started reformatting [#128](https://github.com/apache/datafusion-comet/pull/128) (comphead) +- doc: Add initial doc how to expand Comet exceptions [#170](https://github.com/apache/datafusion-comet/pull/170) (comphead) +- doc: Update README.md with shuffle configs [#208](https://github.com/apache/datafusion-comet/pull/208) (viirya) +- doc: Update supported expressions [#237](https://github.com/apache/datafusion-comet/pull/237) (viirya) +- doc: Fix a small typo in README.md [#272](https://github.com/apache/datafusion-comet/pull/272) (rz-vastdata) +- doc: Update DataFusion project name and url [#300](https://github.com/apache/datafusion-comet/pull/300) (viirya) +- docs: Move existing documentation into new Contributor Guide and add Getting Started section [#334](https://github.com/apache/datafusion-comet/pull/334) (andygrove) +- docs: Add more content to the user guide [#347](https://github.com/apache/datafusion-comet/pull/347) (andygrove) +- docs: Generate configuration guide in mvn build [#349](https://github.com/apache/datafusion-comet/pull/349) (andygrove) +- docs: Add a plugin overview page to the contributors guide [#345](https://github.com/apache/datafusion-comet/pull/345) (andygrove) +- doc: Fix target typo in development.md [#364](https://github.com/apache/datafusion-comet/pull/364) (jc4x4) +- doc: Clean up supported JDKs in README [#366](https://github.com/apache/datafusion-comet/pull/366) (edmondop) +- doc: add contributing in README.md [#382](https://github.com/apache/datafusion-comet/pull/382) (caicancai) +- docs: fix the docs url of installation instructions [#393](https://github.com/apache/datafusion-comet/pull/393) (haoxins) +- docs: Running ScalaTest suites from the CLI [#404](https://github.com/apache/datafusion-comet/pull/404) (edmondop) +- docs: Remove spark.comet.exec.broadcast.enabled from config docs [#421](https://github.com/apache/datafusion-comet/pull/421) (andygrove) +- docs: fix various sphinx warnings [#428](https://github.com/apache/datafusion-comet/pull/428) (tshauck) +- doc: Add Plan Stability Testing to development guide [#432](https://github.com/apache/datafusion-comet/pull/432) (viirya) +- docs: Update Spark shell command to include setting additional class path [#435](https://github.com/apache/datafusion-comet/pull/435) (andygrove) +- doc: Add Tuning Guide with shuffle configs [#443](https://github.com/apache/datafusion-comet/pull/443) (viirya) +- docs: Add benchmarking guide [#444](https://github.com/apache/datafusion-comet/pull/444) (andygrove) +- docs: add guide to adding a new expression [#422](https://github.com/apache/datafusion-comet/pull/422) (tshauck) +- docs: changes in documentation [#512](https://github.com/apache/datafusion-comet/pull/512) (SemyonSinchenko) +- docs: Improve user documentation for supported operators and expressions [#520](https://github.com/apache/datafusion-comet/pull/520) (andygrove) +- docs: Proposal for source release process [#556](https://github.com/apache/datafusion-comet/pull/556) (andygrove) +- docs: Update benchmark results [#687](https://github.com/apache/datafusion-comet/pull/687) (andygrove) +- docs: Update percentage speedups in benchmarking guide [#691](https://github.com/apache/datafusion-comet/pull/691) (andygrove) +- doc: Add memory tuning section to user guide [#684](https://github.com/apache/datafusion-comet/pull/684) (viirya) + +**Other:** + +- Initial PR [#1](https://github.com/apache/datafusion-comet/pull/1) (sunchao) +- build: Add Maven wrapper to the project [#13](https://github.com/apache/datafusion-comet/pull/13) (sunchao) +- build: Add basic CI test pipelines [#18](https://github.com/apache/datafusion-comet/pull/18) (sunchao) +- Bump com.google.protobuf:protobuf-java from 3.17.3 to 3.19.6 [#5](https://github.com/apache/datafusion-comet/pull/5) (dependabot[bot]) +- build: Add PR template [#23](https://github.com/apache/datafusion-comet/pull/23) (sunchao) +- build: Create ticket templates [#24](https://github.com/apache/datafusion-comet/pull/24) (comphead) +- build: Re-enable Scala style checker and spotless [#21](https://github.com/apache/datafusion-comet/pull/21) (sunchao) +- build: Remove license header from pull request template [#28](https://github.com/apache/datafusion-comet/pull/28) (viirya) +- build: Exclude .github from apache-rat-plugin check [#32](https://github.com/apache/datafusion-comet/pull/32) (viirya) +- build: Add CI for MacOS (x64 and aarch64) [#35](https://github.com/apache/datafusion-comet/pull/35) (sunchao) +- fix broken link in README.md [#39](https://github.com/apache/datafusion-comet/pull/39) (nairbv) +- test: Add some fuzz testing for cast operations [#16](https://github.com/apache/datafusion-comet/pull/16) (andygrove) +- test: Fix CI failure on libcrypto [#41](https://github.com/apache/datafusion-comet/pull/41) (sunchao) +- test: Reduce test time spent in `CometShuffleSuite` [#40](https://github.com/apache/datafusion-comet/pull/40) (sunchao) +- test: Add test for RoundRobinPartitioning [#54](https://github.com/apache/datafusion-comet/pull/54) (viirya) +- build: Fix potential libcrypto lib loading issue for X86 mac runners [#55](https://github.com/apache/datafusion-comet/pull/55) (advancedxy) +- refactor: Remove a few duplicated occurrences [#53](https://github.com/apache/datafusion-comet/pull/53) (sunchao) +- build: Fix mvn cache for containerized runners [#48](https://github.com/apache/datafusion-comet/pull/48) (advancedxy) +- test: Ensure traversed operators during finding first partial aggregaion are all native [#58](https://github.com/apache/datafusion-comet/pull/58) (viirya) +- build: Upgrade arrow-rs to 50.0.0 and DataFusion to 35.0.0 [#65](https://github.com/apache/datafusion-comet/pull/65) (viirya) +- build: Support built with java 1.8 [#45](https://github.com/apache/datafusion-comet/pull/45) (advancedxy) +- test: Add golden files for TPCDSPlanStabilitySuite [#73](https://github.com/apache/datafusion-comet/pull/73) (sunchao) +- test: Add TPC-DS test results [#77](https://github.com/apache/datafusion-comet/pull/77) (sunchao) +- build: Upgrade spotless version to 2.43.0 [#85](https://github.com/apache/datafusion-comet/pull/85) (viirya) +- test: Expose thrown exception when executing query in CometTPCHQuerySuite [#96](https://github.com/apache/datafusion-comet/pull/96) (viirya) +- test: Enable TPCDS q41 in CometTPCDSQuerySuite [#98](https://github.com/apache/datafusion-comet/pull/98) (viirya) +- build: Add CI for TPCDS queries [#99](https://github.com/apache/datafusion-comet/pull/99) (viirya) +- build: Add tpcds-sf-1 to license header excluded list [#108](https://github.com/apache/datafusion-comet/pull/108) (viirya) +- build: Show time duration for scala test [#116](https://github.com/apache/datafusion-comet/pull/116) (advancedxy) +- test: Move MacOS (x86) pipelines to post-commit [#122](https://github.com/apache/datafusion-comet/pull/122) (sunchao) +- build: Upgrade DF to 36.0.0 and arrow-rs 50.0.0 [#66](https://github.com/apache/datafusion-comet/pull/66) (comphead) +- test: Reduce end-to-end test time [#109](https://github.com/apache/datafusion-comet/pull/109) (sunchao) +- build: Separate and speedup TPC-DS benchmark [#130](https://github.com/apache/datafusion-comet/pull/130) (advancedxy) +- build: Re-enable TPCDS queries q34 and q64 in `CometTPCDSQuerySuite` [#133](https://github.com/apache/datafusion-comet/pull/133) (viirya) +- build: Refine names in benchmark.yml [#132](https://github.com/apache/datafusion-comet/pull/132) (advancedxy) +- build: Make the build system work out of box [#136](https://github.com/apache/datafusion-comet/pull/136) (advancedxy) +- minor: Update README.md with system diagram [#148](https://github.com/apache/datafusion-comet/pull/148) (alamb) +- test: Add golden files for test [#150](https://github.com/apache/datafusion-comet/pull/150) (snmvaughan) +- build: Add checker for PR title [#151](https://github.com/apache/datafusion-comet/pull/151) (sunchao) +- build: Support CI pipelines for Spark 3.2, 3.3 and 3.4 [#153](https://github.com/apache/datafusion-comet/pull/153) (advancedxy) +- minor: Only trigger PR title checker on pull requests [#154](https://github.com/apache/datafusion-comet/pull/154) (sunchao) +- chore: Fix warnings in both compiler and test environments [#164](https://github.com/apache/datafusion-comet/pull/164) (advancedxy) +- build: Upload test reports and coverage [#163](https://github.com/apache/datafusion-comet/pull/163) (advancedxy) +- minor: Remove unnecessary logic [#169](https://github.com/apache/datafusion-comet/pull/169) (sunchao) +- minor: Make `QueryPlanSerde` warning log less confusing [#181](https://github.com/apache/datafusion-comet/pull/181) (viirya) +- refactor: Skipping slicing on shuffle arrays in shuffle reader [#189](https://github.com/apache/datafusion-comet/pull/189) (viirya) +- build: Run Spark SQL tests for 3.4 [#166](https://github.com/apache/datafusion-comet/pull/166) (sunchao) +- build: Enforce scalafix check in CI [#203](https://github.com/apache/datafusion-comet/pull/203) (advancedxy) +- test: Follow up on Spark 3.4 diff [#209](https://github.com/apache/datafusion-comet/pull/209) (sunchao) +- build: Avoid confusion by using profile with clean [#215](https://github.com/apache/datafusion-comet/pull/215) (snmvaughan) +- test: Add TPC-H test results [#218](https://github.com/apache/datafusion-comet/pull/218) (viirya) +- build: Add CI for TPC-H queries [#220](https://github.com/apache/datafusion-comet/pull/220) (viirya) +- test: Enable Comet shuffle in Spark SQL tests [#210](https://github.com/apache/datafusion-comet/pull/210) (sunchao) +- test: Disable spark ui in unit test by default [#235](https://github.com/apache/datafusion-comet/pull/235) (beryllw) +- chore: Replace deprecated temporal methods [#229](https://github.com/apache/datafusion-comet/pull/229) (snmvaughan) +- build: Use specified branch of arrow-rs with workaround to invalid offset buffers from Java Arrow [#239](https://github.com/apache/datafusion-comet/pull/239) (viirya) +- test: Enable string-to-bool cast test [#251](https://github.com/apache/datafusion-comet/pull/251) (andygrove) +- test: Restore tests in CometTPCDSQuerySuite [#252](https://github.com/apache/datafusion-comet/pull/252) (viirya) +- test: Enable all remaining TPCDS queries [#254](https://github.com/apache/datafusion-comet/pull/254) (viirya) +- test: Enable all remaining TPCH queries [#257](https://github.com/apache/datafusion-comet/pull/257) (viirya) +- chore: Remove some calls to unwrap when calling create_expr in planner.rs [#269](https://github.com/apache/datafusion-comet/pull/269) (andygrove) +- chore: Fix typo in info message [#279](https://github.com/apache/datafusion-comet/pull/279) (andygrove) +- chore: Fix NPE when running CometTPCHQueriesList directly [#285](https://github.com/apache/datafusion-comet/pull/285) (advancedxy) +- chore: Update Comet repo description [#291](https://github.com/apache/datafusion-comet/pull/291) (viirya) +- Chore: Cleanup how datafusion session config is created [#289](https://github.com/apache/datafusion-comet/pull/289) (psvri) +- build: Update asf.yaml to use `@datafusion.apache.org` [#294](https://github.com/apache/datafusion-comet/pull/294) (sunchao) +- chore: Remove unused functions [#301](https://github.com/apache/datafusion-comet/pull/301) (kazuyukitanimura) +- chore: Ignore unused variables [#306](https://github.com/apache/datafusion-comet/pull/306) (snmvaughan) +- chore: Update documentation publishing domain and path [#310](https://github.com/apache/datafusion-comet/pull/310) (andygrove) +- chore: Add documentation publishing infrastructure [#314](https://github.com/apache/datafusion-comet/pull/314) (andygrove) +- build: Move shim directories [#318](https://github.com/apache/datafusion-comet/pull/318) (kazuyukitanimura) +- test: Suppress decimal random number tests for 3.2 and 3.3 [#319](https://github.com/apache/datafusion-comet/pull/319) (kazuyukitanimura) +- chore: Add allocation source to StreamReader [#332](https://github.com/apache/datafusion-comet/pull/332) (viirya) +- chore: Add more cast tests and improve test framework [#351](https://github.com/apache/datafusion-comet/pull/351) (andygrove) +- chore: Implement remaining CAST tests [#356](https://github.com/apache/datafusion-comet/pull/356) (andygrove) +- build: Add Spark SQL test pipeline with ANSI mode enabled [#321](https://github.com/apache/datafusion-comet/pull/321) (parthchandra) +- chore: Store EXTENSION_INFO as Set[String] instead of newline-delimited String [#386](https://github.com/apache/datafusion-comet/pull/386) (andygrove) +- build: Add scala-version to matrix [#396](https://github.com/apache/datafusion-comet/pull/396) (snmvaughan) +- chore: Add criterion benchmarks for casting between integer types [#401](https://github.com/apache/datafusion-comet/pull/401) (andygrove) +- chore: Make COMET_EXEC_BROADCAST_FORCE_ENABLED internal config [#413](https://github.com/apache/datafusion-comet/pull/413) (viirya) +- chore: Rename some columnar shuffle configs for code consistently [#418](https://github.com/apache/datafusion-comet/pull/418) (leoluan2009) +- chore: Remove an unused config [#430](https://github.com/apache/datafusion-comet/pull/430) (andygrove) +- tests: Move random data generation methods from CometCastSuite to new DataGenerator class [#426](https://github.com/apache/datafusion-comet/pull/426) (andygrove) +- test: Fix explain with exteded info comet test [#436](https://github.com/apache/datafusion-comet/pull/436) (kazuyukitanimura) +- chore: Add cargo bench for shuffle writer [#438](https://github.com/apache/datafusion-comet/pull/438) (andygrove) +- chore: improve fallback message when comet native shuffle is not enabled [#445](https://github.com/apache/datafusion-comet/pull/445) (andygrove) +- Coverage: Add a manual test to show what Spark built in expression the DF can support directly [#331](https://github.com/apache/datafusion-comet/pull/331) (comphead) +- build: Add spark-4.0 profile and shims [#407](https://github.com/apache/datafusion-comet/pull/407) (kazuyukitanimura) +- build: bump spark version to 3.4.3 [#292](https://github.com/apache/datafusion-comet/pull/292) (huaxingao) +- chore: Removing copying data from dictionary values into CometDictionary [#490](https://github.com/apache/datafusion-comet/pull/490) (viirya) +- chore: Update README to highlight Comet benefits [#497](https://github.com/apache/datafusion-comet/pull/497) (andygrove) +- test: fix ClassNotFoundException for Hive tests [#499](https://github.com/apache/datafusion-comet/pull/499) (kazuyukitanimura) +- build: Enable comet tests with spark-4.0 profile [#493](https://github.com/apache/datafusion-comet/pull/493) (kazuyukitanimura) +- chore: Switch to stable Rust [#505](https://github.com/apache/datafusion-comet/pull/505) (andygrove) +- Minor: Generate the supported Spark builtin expression list into MD file [#455](https://github.com/apache/datafusion-comet/pull/455) (comphead) +- chore: Simplify code in CometExecIterator and avoid some small overhead [#522](https://github.com/apache/datafusion-comet/pull/522) (andygrove) +- chore: Upgrade spark to 4.0.0-preview1 [#526](https://github.com/apache/datafusion-comet/pull/526) (advancedxy) +- chore: Add UnboundColumn to carry datatype for unbound reference [#518](https://github.com/apache/datafusion-comet/pull/518) (viirya) +- chore: Remove 3.4.2.diff [#528](https://github.com/apache/datafusion-comet/pull/528) (kazuyukitanimura) +- build: Switch back to official DataFusion repo and arrow-rs after Arrow Java 16 is released [#403](https://github.com/apache/datafusion-comet/pull/403) (viirya) +- chore: Add CometEvalMode enum to replace string literals [#539](https://github.com/apache/datafusion-comet/pull/539) (andygrove) +- chore: Create initial release process scripts for official ASF source release [#429](https://github.com/apache/datafusion-comet/pull/429) (andygrove) +- build: Use DataFusion 39.0.0 release [#550](https://github.com/apache/datafusion-comet/pull/550) (viirya) +- chore: disable xxhash64 by default [#548](https://github.com/apache/datafusion-comet/pull/548) (andygrove) +- chore: Remove unsafe use of from_raw_parts in Parquet decoder [#549](https://github.com/apache/datafusion-comet/pull/549) (andygrove) +- test: Add tests for Scalar and Inverval values for UnaryMinus [#538](https://github.com/apache/datafusion-comet/pull/538) (vaibhawvipul) +- chore: Add changelog generator [#545](https://github.com/apache/datafusion-comet/pull/545) (andygrove) +- chore: Remove unused hash_utils.rs [#561](https://github.com/apache/datafusion-comet/pull/561) (andygrove) +- chore: Use in_list func directly [#559](https://github.com/apache/datafusion-comet/pull/559) (advancedxy) +- chore: Fix most of the scala/java build warnings [#562](https://github.com/apache/datafusion-comet/pull/562) (andygrove) +- chore: Upgrade to Rust 1.78 and fix UB issues in unsafe code [#546](https://github.com/apache/datafusion-comet/pull/546) (andygrove) +- chore: Remove `spark.comet.xxhash64.enabled` from the config document [#586](https://github.com/apache/datafusion-comet/pull/586) (viirya) +- build: Drop Spark 3.2 support [#581](https://github.com/apache/datafusion-comet/pull/581) (huaxingao) +- test: Enable Spark 4.0 tests [#537](https://github.com/apache/datafusion-comet/pull/537) (kazuyukitanimura) +- refactor: Remove method get_global_jclass [#580](https://github.com/apache/datafusion-comet/pull/580) (eejbyfeldt) +- chore: Move some utility methods to submodules of scalar_funcs [#590](https://github.com/apache/datafusion-comet/pull/590) (advancedxy) +- chore: Upgrade to Rust 1.79 [#570](https://github.com/apache/datafusion-comet/pull/570) (andygrove) +- chore: Remove some calls to `unwrap` [#598](https://github.com/apache/datafusion-comet/pull/598) (andygrove) +- chore: Improve JNI safety [#600](https://github.com/apache/datafusion-comet/pull/600) (andygrove) +- chore: remove some unwraps from shuffle module [#601](https://github.com/apache/datafusion-comet/pull/601) (andygrove) +- chore: Use proper constructor of IndexShuffleBlockResolver [#610](https://github.com/apache/datafusion-comet/pull/610) (viirya) +- chore: Update benchmark results [#614](https://github.com/apache/datafusion-comet/pull/614) (andygrove) +- build: Upgrade to 2.13.14 for scala-2.13 profile [#626](https://github.com/apache/datafusion-comet/pull/626) (viirya) +- chore: Rename shuffle write metric [#624](https://github.com/apache/datafusion-comet/pull/624) (andygrove) +- minor: replace .downcast_ref::().is_some() with .is::() [#635](https://github.com/apache/datafusion-comet/pull/635) (andygrove) +- test: Add CometTPCDSQueryTestSuite [#628](https://github.com/apache/datafusion-comet/pull/628) (viirya) +- chore: Convert Rust project into a workspace [#637](https://github.com/apache/datafusion-comet/pull/637) (andygrove) +- chore: Add Miri workflow [#636](https://github.com/apache/datafusion-comet/pull/636) (andygrove) +- test: Run optimized version of q72 derived from TPC-DS [#652](https://github.com/apache/datafusion-comet/pull/652) (viirya) +- chore: Refactoring of CometError/SparkError [#655](https://github.com/apache/datafusion-comet/pull/655) (andygrove) +- chore: Move `cast` to `spark-expr` crate [#654](https://github.com/apache/datafusion-comet/pull/654) (andygrove) +- chore: Remove utils crate and move utils into spark-expr crate [#658](https://github.com/apache/datafusion-comet/pull/658) (andygrove) +- chore: Move temporal kernels and expressions to spark-expr crate [#660](https://github.com/apache/datafusion-comet/pull/660) (andygrove) +- chore: Move protobuf files to separate crate [#661](https://github.com/apache/datafusion-comet/pull/661) (andygrove) +- Use IfExpr to check when input to log2 is <=0 and return null [#506](https://github.com/apache/datafusion-comet/pull/506) (PedroMDuarte) +- chore: Change suffix on some expressions from Exec to Expr [#673](https://github.com/apache/datafusion-comet/pull/673) (andygrove) +- chore: Fix some regressions with Spark 3.5.1 [#674](https://github.com/apache/datafusion-comet/pull/674) (andygrove) +- chore: Improve fuzz testing coverage [#668](https://github.com/apache/datafusion-comet/pull/668) (andygrove) +- Create Comet docker file [#675](https://github.com/apache/datafusion-comet/pull/675) (comphead) +- chore: Add microbenchmarks [#671](https://github.com/apache/datafusion-comet/pull/671) (andygrove) +- build: Exclude protobug generated codes from apache-rat check [#683](https://github.com/apache/datafusion-comet/pull/683) (viirya) +- chore: Disable abs and signum because they return incorrect results [#695](https://github.com/apache/datafusion-comet/pull/695) (andygrove) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 100 Liang-Chi Hsieh + 82 Andy Grove + 28 advancedxy + 27 Chao Sun + 14 Huaxin Gao + 11 KAZUYUKI TANIMURA + 9 Vipul Vaibhaw + 8 Parth Chandra + 7 Emil Ejbyfeldt + 7 Steve Vaughan + 7 comphead + 4 Oleks V + 4 Pablo Langa + 4 Trent Hauck + 2 Edmondo Porcu + 2 Vrishabh + 2 Xin Hao + 2 Xuedong Luan + 1 Andrew Lamb + 1 Brian Vaughan + 1 Cancai Cai + 1 Eren Avsarogullari + 1 Holden Karau + 1 JC + 1 Junbo wang + 1 Junfan Zhang + 1 Pedro M Duarte + 1 Prashant K. Sharma + 1 RickestCode + 1 Rohit Rastogi + 1 Roman Zeyde + 1 Semyon + 1 Son + 1 Sujith Jay Nair + 1 Zhen Wang + 1 ceppelli + 1 dependabot[bot] + 1 thexia + 1 vidyasankarv + 1 wankun + 1 గణేష్ +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/dev/release/README.md b/dev/release/README.md index 1abb359ef..fc8e7ca44 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -51,6 +51,10 @@ git push apache branch-0.1 Create and merge a PR against the release branch to update the Maven version from `0.1.0-SNAPSHOT` to `0.1.0` +### Update Version in main + +Create a PR against the main branch to update the Rust crate version to `0.2.0` and the Maven version to `0.2.0-SNAPSHOT`. + ### Generate the Change Log Generate a change log to cover changes between the previous release and the release branch HEAD by running @@ -88,10 +92,6 @@ git tag 0.1.0-rc1 git push apache 0.1.0-rc1 ``` -### Update Version in main - -Create a PR against the main branch to update the Rust crate version to `0.2.0` and the Maven version to `0.2.0-SNAPHOT`. - ## Publishing the Release Candidate This part of the process can mostly only be performed by a PMC member. diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index da09e3d1b..df150835e 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -107,7 +107,7 @@ setup_tempdir() { test_source_distribution() { set -e - pushd core + pushd native RUSTFLAGS="-Ctarget-cpu=native" cargo build --release popd # test with the latest supported version of Spark diff --git a/dev/release/verifying-release-candidates.md b/dev/release/verifying-release-candidates.md index 85cdf010e..ca93ad504 100644 --- a/dev/release/verifying-release-candidates.md +++ b/dev/release/verifying-release-candidates.md @@ -28,6 +28,12 @@ again is somewhat redundant. ./dev/release/verify-release-candidate.sh 0.1.0 1 ``` +The following command can be used to build a release for testing. + +```shell +make release-nogit +``` + We hope that users will verify the release beyond running this script by testing the release candidate with their existing Spark jobs and report any functional issues or performance regressions. diff --git a/docs/source/user-guide/installation.md b/docs/source/user-guide/installation.md index bdf6c0e00..7d140a7ac 100644 --- a/docs/source/user-guide/installation.md +++ b/docs/source/user-guide/installation.md @@ -32,11 +32,18 @@ Make sure the following requirements are met and software installed on your mach - JDK 8 and up - GLIBC 2.17 (Centos 7) and up -## Using a Published Release +## Using a Published Binary Release -There are no public releases available yet, so it is necessary to build from source as described in the next section. +There are no published binary releases yet. -## Building From Source +## Using a Published Source Release + +Official source releases can be downloaded from https://dist.apache.org/repos/dist/release/datafusion/ + +Building from a source release is mostly the same as building directly from the GitHub repository but requires the +use of the command `make release-nogit` instead of `make release`. + +## Building from the GitHub repository Clone the repository: diff --git a/fuzz-testing/pom.xml b/fuzz-testing/pom.xml index f69d959f9..a413ae2e1 100644 --- a/fuzz-testing/pom.xml +++ b/fuzz-testing/pom.xml @@ -25,7 +25,7 @@ under the License. org.apache.comet comet-parent-spark${spark.version.short}_${scala.binary.version} - 0.1.0-SNAPSHOT + 0.2.0-SNAPSHOT ../pom.xml diff --git a/native/Cargo.lock b/native/Cargo.lock index 339d9ad84..af7951b87 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -849,7 +849,7 @@ dependencies = [ [[package]] name = "datafusion-comet" -version = "0.1.0" +version = "0.2.0" dependencies = [ "ahash", "arrow", @@ -902,7 +902,7 @@ dependencies = [ [[package]] name = "datafusion-comet-proto" -version = "0.1.0" +version = "0.2.0" dependencies = [ "prost 0.12.6", "prost-build", @@ -910,7 +910,7 @@ dependencies = [ [[package]] name = "datafusion-comet-spark-expr" -version = "0.1.0" +version = "0.2.0" dependencies = [ "arrow", "arrow-array", diff --git a/native/Cargo.toml b/native/Cargo.toml index c52d906bc..f62fd3219 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -20,7 +20,7 @@ members = ["core", "spark-expr", "proto"] resolver = "2" [workspace.package] -version = "0.1.0" +version = "0.2.0" homepage = "https://datafusion.apache.org/comet" repository = "https://github.com/apache/datafusion-comet" authors = ["Apache DataFusion "] @@ -46,8 +46,8 @@ datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "40. datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "40.0.0", default-features = false } datafusion-physical-expr-common = { git = "https://github.com/apache/datafusion.git", rev = "40.0.0", default-features = false } datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "40.0.0", default-features = false } -datafusion-comet-spark-expr = { path = "spark-expr", version = "0.1.0" } -datafusion-comet-proto = { path = "proto", version = "0.1.0" } +datafusion-comet-spark-expr = { path = "spark-expr", version = "0.2.0" } +datafusion-comet-proto = { path = "proto", version = "0.2.0" } chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } num = "0.4" diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index 158c26319..3046c1d8f 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-comet" -version = "0.1.0" +version = { workspace = true } homepage = "https://datafusion.apache.org/comet" repository = "https://github.com/apache/datafusion-comet" authors = ["Apache DataFusion "] diff --git a/native/proto/Cargo.toml b/native/proto/Cargo.toml index 29aba6396..6c217fac2 100644 --- a/native/proto/Cargo.toml +++ b/native/proto/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-comet-proto" -version = "0.1.0" +version = { workspace = true } homepage = "https://datafusion.apache.org/comet" repository = "https://github.com/apache/datafusion-comet" authors = ["Apache DataFusion "] diff --git a/pom.xml b/pom.xml index e82a09996..98a5624d3 100644 --- a/pom.xml +++ b/pom.xml @@ -25,7 +25,7 @@ under the License. 4.0.0 org.apache.comet comet-parent-spark${spark.version.short}_${scala.binary.version} - 0.1.0-SNAPSHOT + 0.2.0-SNAPSHOT pom Comet Project Parent POM diff --git a/spark-integration/pom.xml b/spark-integration/pom.xml index 6af59ac69..20f4ee00f 100644 --- a/spark-integration/pom.xml +++ b/spark-integration/pom.xml @@ -26,7 +26,7 @@ under the License. org.apache.comet comet-parent-spark${spark.version.short}_${scala.binary.version} - 0.1.0-SNAPSHOT + 0.2.0-SNAPSHOT ../pom.xml diff --git a/spark/pom.xml b/spark/pom.xml index 70ea32187..e3ed6b2e4 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -26,7 +26,7 @@ under the License. org.apache.comet comet-parent-spark${spark.version.short}_${scala.binary.version} - 0.1.0-SNAPSHOT + 0.2.0-SNAPSHOT ../pom.xml