Skip to content

Commit

Permalink
Merge branch 'refs/heads/4.0.0-develop' into rel/4.0.0_arenadata1
Browse files Browse the repository at this point in the history
  • Loading branch information
Asmoday committed Jul 30, 2024
2 parents 480fb8c + 30cedca commit 63be69d
Show file tree
Hide file tree
Showing 26 changed files with 604 additions and 322 deletions.
5 changes: 5 additions & 0 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,9 @@ public static enum ConfVars {
METASTORE_URIS("hive.metastore.uris", "",
"Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."),

@Deprecated
METASTOREURIS("hive.metastore.uris", "",
"Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."),
/**
* @deprecated Use MetastoreConf.THRIFT_URI_SELECTION
*/
Expand Down Expand Up @@ -3914,6 +3917,8 @@ public static enum ConfVars {
"The parent node in ZooKeeper used by HiveServer2 when supporting dynamic service discovery."),
HIVE_SERVER2_ZOOKEEPER_PUBLISH_CONFIGS("hive.server2.zookeeper.publish.configs", true,
"Whether we should publish HiveServer2's configs to ZooKeeper."),
HIVE_SERVER2_LEADER_ZOOKEEPER_NAMESPACE("hive.server2.leader.zookeeper.namespace", "hiveserver2-leader",
"Zookeeper znode for HiveServer2 leader selection."),
HIVE_SERVER2_TRUSTED_PROXY_TRUSTHEADER("hive.server2.proxy.trustheader", "", "This config " +
"indicates whether the connection is authenticated before the requests lands on HiveServer2, So that we can" +
"avoid the authentication is again in HS2. Default value is empty, if it's value is set to some header say " +
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
org.apache.hive.org.apache.calcite.jdbc.Driver
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ private Map<String, Integer> buildFieldPositionMap(StructType schema) {
private static Function<Object, Object> converter(Type type) {
switch (type.typeId()) {
case TIMESTAMP:
return timestamp -> DateTimeUtil.timestamptzFromMicros((Long) timestamp);
return timestamp -> ((Types.TimestampType) type).shouldAdjustToUTC() ?
DateTimeUtil.timestamptzFromMicros((Long) timestamp) :
DateTimeUtil.timestampFromMicros((Long) timestamp);
case DATE:
return date -> DateTimeUtil.dateFromDays((Integer) date);
case STRUCT:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- SORT_QUERY_RESULTS
-- MASK_TIMESTAMP
create table ice_ts_4 (id int, ts timestamp ) stored by iceberg stored as parquet tblproperties ('format-version'='2');
insert into ice_ts_4 values (1, cast('2023-07-20 00:00:00' as timestamp)), (2, cast('2023-07-20 00:00:00' as timestamp));
select * from ice_ts_4;
delete from ice_ts_4 where id = 2;
select * from ice_ts_4;
select readable_metrics from default.ice_ts_4.FILES;
select readable_metrics from default.ice_ts_4.ALL_FILES;
select readable_metrics from default.ice_ts_4.DATA_FILES;
select readable_metrics from default.ice_ts_4.ALL_DATA_FILES;
select readable_metrics from default.ice_ts_4.DELETE_FILES;

-- Test partitions table
CREATE EXTERNAL TABLE ice_part (`col1` int, `decimalA` decimal(5,2), `decimalC` decimal(5,2)) PARTITIONED BY SPEC
(decimalC) stored by iceberg tblproperties('format-version'='2');
insert into ice_part values(1, 122.91, 102.21), (1, 12.32, 200.12);
select last_updated_at from default.ice_part.PARTITIONS;
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
PREHOOK: query: create table ice_ts_4 (id int, ts timestamp ) stored by iceberg stored as parquet tblproperties ('format-version'='2')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ice_ts_4
POSTHOOK: query: create table ice_ts_4 (id int, ts timestamp ) stored by iceberg stored as parquet tblproperties ('format-version'='2')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ice_ts_4
PREHOOK: query: insert into ice_ts_4 values (1, cast('2023-07-20 00:00:00' as timestamp)), (2, cast('2023-07-20 00:00:00' as timestamp))
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice_ts_4
POSTHOOK: query: insert into ice_ts_4 values (1, cast('2023-07-20 00:00:00' as timestamp)), (2, cast('2023-07-20 00:00:00' as timestamp))
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_ts_4
PREHOOK: query: select * from ice_ts_4
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_ts_4
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from ice_ts_4
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_ts_4
POSTHOOK: Output: hdfs://### HDFS PATH ###
1 2023-07-20 00:00:00
2 2023-07-20 00:00:00
PREHOOK: query: delete from ice_ts_4 where id = 2
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_ts_4
PREHOOK: Output: default@ice_ts_4
POSTHOOK: query: delete from ice_ts_4 where id = 2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_ts_4
POSTHOOK: Output: default@ice_ts_4
PREHOOK: query: select * from ice_ts_4
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_ts_4
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from ice_ts_4
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_ts_4
POSTHOOK: Output: hdfs://### HDFS PATH ###
1 2023-07-20 00:00:00
PREHOOK: query: select readable_metrics from default.ice_ts_4.FILES
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_ts_4
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select readable_metrics from default.ice_ts_4.FILES
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_ts_4
POSTHOOK: Output: hdfs://### HDFS PATH ###
{"id":{"column_size":46,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":2},"ts":{"column_size":76,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"2023-07-20 00:00:00","upper_bound":"2023-07-20 00:00:00"}}
{"id":{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"ts":{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bound":null}}
PREHOOK: query: select readable_metrics from default.ice_ts_4.ALL_FILES
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_ts_4
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select readable_metrics from default.ice_ts_4.ALL_FILES
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_ts_4
POSTHOOK: Output: hdfs://### HDFS PATH ###
{"id":{"column_size":46,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":2},"ts":{"column_size":76,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"2023-07-20 00:00:00","upper_bound":"2023-07-20 00:00:00"}}
{"id":{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"ts":{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bound":null}}
PREHOOK: query: select readable_metrics from default.ice_ts_4.DATA_FILES
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_ts_4
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select readable_metrics from default.ice_ts_4.DATA_FILES
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_ts_4
POSTHOOK: Output: hdfs://### HDFS PATH ###
{"id":{"column_size":46,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":2},"ts":{"column_size":76,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"2023-07-20 00:00:00","upper_bound":"2023-07-20 00:00:00"}}
PREHOOK: query: select readable_metrics from default.ice_ts_4.ALL_DATA_FILES
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_ts_4
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select readable_metrics from default.ice_ts_4.ALL_DATA_FILES
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_ts_4
POSTHOOK: Output: hdfs://### HDFS PATH ###
{"id":{"column_size":46,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":2},"ts":{"column_size":76,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"2023-07-20 00:00:00","upper_bound":"2023-07-20 00:00:00"}}
PREHOOK: query: select readable_metrics from default.ice_ts_4.DELETE_FILES
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_ts_4
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select readable_metrics from default.ice_ts_4.DELETE_FILES
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_ts_4
POSTHOOK: Output: hdfs://### HDFS PATH ###
{"id":{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"ts":{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bound":null}}
PREHOOK: query: CREATE EXTERNAL TABLE ice_part (`col1` int, `decimalA` decimal(5,2), `decimalC` decimal(5,2)) PARTITIONED BY SPEC
(decimalC) stored by iceberg tblproperties('format-version'='2')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ice_part
POSTHOOK: query: CREATE EXTERNAL TABLE ice_part (`col1` int, `decimalA` decimal(5,2), `decimalC` decimal(5,2)) PARTITIONED BY SPEC
(decimalC) stored by iceberg tblproperties('format-version'='2')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ice_part
PREHOOK: query: insert into ice_part values(1, 122.91, 102.21), (1, 12.32, 200.12)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice_part
POSTHOOK: query: insert into ice_part values(1, 122.91, 102.21), (1, 12.32, 200.12)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_part
PREHOOK: query: select last_updated_at from default.ice_part.PARTITIONS
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_part
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select last_updated_at from default.ice_part.PARTITIONS
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_part
POSTHOOK: Output: hdfs://### HDFS PATH ###
###MaskedTimeStamp###
###MaskedTimeStamp###
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@
import com.google.common.collect.Lists;
import java.util.concurrent.TimeUnit;

import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.AbstractRelNode;
import org.apache.calcite.rel.BiRel;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.SingleRel;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelRecordType;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.tools.RelBuilder;
import org.apache.hive.org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.hive.org.apache.calcite.plan.RelOptCluster;
import org.apache.hive.org.apache.calcite.plan.RelOptPlanner;
import org.apache.hive.org.apache.calcite.plan.RelTraitSet;
import org.apache.hive.org.apache.calcite.rel.AbstractRelNode;
import org.apache.hive.org.apache.calcite.rel.BiRel;
import org.apache.hive.org.apache.calcite.rel.RelNode;
import org.apache.hive.org.apache.calcite.rel.SingleRel;
import org.apache.hive.org.apache.calcite.rel.type.RelDataType;
import org.apache.hive.org.apache.calcite.rel.type.RelRecordType;
import org.apache.hive.org.apache.calcite.rex.RexBuilder;
import org.apache.hive.org.apache.calcite.tools.RelBuilder;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
Expand Down Expand Up @@ -73,7 +73,7 @@ public class FieldTrimmerBench {
RelOptCluster relOptCluster;
RelBuilder relBuilder;
RelNode root;
org.apache.calcite.sql2rel.RelFieldTrimmer cft;
org.apache.hive.org.apache.calcite.sql2rel.RelFieldTrimmer cft;
HiveRelFieldTrimmer ft;
HiveRelFieldTrimmer hft;

Expand Down Expand Up @@ -105,7 +105,7 @@ public void initTrial() {
@Measurement(iterations = 10, time = 2, timeUnit = TimeUnit.SECONDS)
public void baseRelFieldTrimmer() {
// We initialize the field trimmer for every execution of the benchmark
cft = new org.apache.calcite.sql2rel.RelFieldTrimmer(null, relBuilder);
cft = new org.apache.hive.org.apache.calcite.sql2rel.RelFieldTrimmer(null, relBuilder);
cft.trim(root);
cft = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ public class QOutProcessor {
private static final PatternReplacementPair MASK_DATA_SIZE = new PatternReplacementPair(
Pattern.compile(" Data size: [1-9][0-9]*"),
" Data size: ###Masked###");
private static final PatternReplacementPair MASK_TIMESTAMP = new PatternReplacementPair(
Pattern.compile(
"[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1]) (2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9].[0-9]{1,3} [a-zA-Z/]*"),
" ###MaskedTimeStamp### ");
private static final PatternReplacementPair MASK_LINEAGE = new PatternReplacementPair(
Pattern.compile("POSTHOOK: Lineage: .*"),
"POSTHOOK: Lineage: ###Masked###");
Expand Down Expand Up @@ -143,7 +147,7 @@ public String get() {
};

private enum Mask {
STATS("-- MASK_STATS"), DATASIZE("-- MASK_DATA_SIZE"), LINEAGE("-- MASK_LINEAGE");
STATS("-- MASK_STATS"), DATASIZE("-- MASK_DATA_SIZE"), LINEAGE("-- MASK_LINEAGE"), TIMESTAMP("-- MASK_TIMESTAMP");
private Pattern pattern;

Mask(String pattern) {
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
<hadoop.version>3.3.6</hadoop.version>
<hadoop.bin.path>${basedir}/${hive.path.to.root}/testutils/hadoop</hadoop.bin.path>
<hamcrest.version>1.3</hamcrest.version>
<hbase.version>2.5.8-hadoop3</hbase.version>
<hbase.version>2.5.10-hadoop3</hbase.version>
<hppc.version>0.7.2</hppc.version>
<!-- required for logging test to avoid including hbase which pulls disruptor transitively -->
<disruptor.version>3.3.7</disruptor.version>
Expand Down Expand Up @@ -190,7 +190,7 @@
<!-- used by druid storage handler -->
<pac4j-saml.version>4.5.5</pac4j-saml.version>
<paranamer.version>2.8</paranamer.version>
<parquet.version>1.14.1</parquet.version>
<parquet.version>1.13.1</parquet.version>
<pig.version>0.16.0</pig.version>
<plexus.version>1.5.6</plexus.version>
<protobuf.version>3.24.4</protobuf.version>
Expand Down
5 changes: 5 additions & 0 deletions ql/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,7 @@
<include>com.fasterxml.jackson.core:jackson-databind</include>
<include>com.fasterxml.jackson.dataformat:jackson-dataformat-yaml</include>
<include>com.google.guava:guava</include>
<include>com.google.guava:failureaccess</include>
<include>net.sf.opencsv:opencsv</include>
<include>org.apache.hive:hive-storage-api</include>
<include>org.threeten:threeten-extra</include>
Expand Down Expand Up @@ -1179,6 +1180,10 @@
<pattern>org.apache.datasketches</pattern>
<shadedPattern>org.apache.hive.org.apache.datasketches</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.calcite</pattern>
<shadedPattern>org.apache.hive.org.apache.calcite</shadedPattern>
</relocation>
</relocations>
</configuration>
</execution>
Expand Down
Loading

0 comments on commit 63be69d

Please sign in to comment.