Skip to content

Commit

Permalink
HIVE-28773: CBO fails when a materialized view is dropped but its pre…
Browse files Browse the repository at this point in the history
…-compiled plan remains in the registry.
  • Loading branch information
kasakrisz committed Mar 3, 2025
1 parent 341f597 commit 3ce1cc6
Show file tree
Hide file tree
Showing 8 changed files with 517 additions and 219 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.txn.compactor;

import org.apache.hadoop.hive.common.MaterializationSnapshot;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry;
import org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization;
import org.apache.hadoop.hive.ql.metadata.MaterializedViewMetadata;
import org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils;
import org.junit.Assert;
import org.junit.Test;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;

import static java.util.Arrays.asList;
import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver;
import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriverSilently;

public class TestHiveMaterializedViewRegistry extends CompactorOnTezTest {

private static final String DB = "default";
private static final String TABLE1 = "t1";
private static final String MV1 = "mat1";

@Override
public void setup() throws Exception {
super.setup();

executeStatementOnDriverSilently("drop materialized view if exists " + MV1, driver);
executeStatementOnDriverSilently("drop table if exists" + TABLE1 , driver);

executeStatementOnDriver("create table " + TABLE1 + "(a int, b string, c float) stored as orc TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + TABLE1 + "(a,b, c) values (1, 'one', 1.1), (2, 'two', 2.2), (NULL, NULL, NULL)", driver);
}

@Override
public void tearDown() {
executeStatementOnDriverSilently("drop materialized view " + MV1, driver);
executeStatementOnDriverSilently("drop table " + TABLE1 , driver);

super.tearDown();
}

@Test
public void testRefreshAddsNewMV() throws Exception {
CreateMaterializedViewDesc createMaterializedViewDesc = createMaterializedViewDesc();
Table mvTable = createMaterializedViewDesc.toTable(conf);
Hive.get().createTable(mvTable);

HiveMaterializedViewsRegistry.get().refresh(Hive.get());

HiveRelOptMaterialization materialization =
HiveMaterializedViewsRegistry.get().getRewritingMaterializedView(DB, MV1, RewriteAlgorithm.ALL);

Assert.assertEquals(DB, materialization.qualifiedTableName.get(0));
Assert.assertEquals(MV1, materialization.qualifiedTableName.get(1));
}

@Test
public void testRefreshDoesNotAddMVDisabledForRewrite() throws Exception {
CreateMaterializedViewDesc createMaterializedViewDesc = createMaterializedViewDesc();
createMaterializedViewDesc.setRewriteEnabled(false);
Table mvTable = createMaterializedViewDesc.toTable(conf);
Hive.get().createTable(mvTable);

HiveMaterializedViewsRegistry.get().refresh(Hive.get());

HiveRelOptMaterialization materialization =
HiveMaterializedViewsRegistry.get().getRewritingMaterializedView(DB, MV1, RewriteAlgorithm.ALL);
Assert.assertNull(materialization);
}

@Test
public void testRefreshUpdatesExistingMV() throws Exception {
// init the registry
HiveMaterializedViewsRegistry.get().refresh(Hive.get());

executeStatementOnDriver("create materialized view " + MV1 + " as " +
"select a,b,c from " + TABLE1 + " where a > 0 or a is null", driver);

// replace the MV
Hive.get().dropTable(DB, MV1);

CreateMaterializedViewDesc createMaterializedViewDesc = createMaterializedViewDesc();
Table mvTable = createMaterializedViewDesc.toTable(conf);
mvTable.setMaterializedViewMetadata(new MaterializedViewMetadata(
"hive", DB, MV1, new HashSet<>(), new MaterializationSnapshot("anything")));
Hive.get().createTable(mvTable);

// test refreshing the registry
HiveMaterializedViewsRegistry.get().refresh(Hive.get());

HiveRelOptMaterialization materialization =
HiveMaterializedViewsRegistry.get().getRewritingMaterializedView(DB, MV1, RewriteAlgorithm.ALL);

Assert.assertEquals(DB, materialization.qualifiedTableName.get(0));
Assert.assertEquals(MV1, materialization.qualifiedTableName.get(1));
Table existingMVTable = HiveMaterializedViewUtils.extractTable(materialization);
Assert.assertEquals(mvTable.getTTable().getCreateTime(), existingMVTable.getCreateTime());
}

@Test
public void testRefreshRemovesMVDisabledForRewrite() throws Exception {
// init the registry
HiveMaterializedViewsRegistry.get().refresh(Hive.get());

executeStatementOnDriver("create materialized view " + MV1 + " as " +
"select a,b,c from " + TABLE1 + " where a > 0 or a is null", driver);

Table mvTable = Hive.get().getTable(DB, MV1);
mvTable.setRewriteEnabled(false);

EnvironmentContext environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
Hive.get().alterTable(mvTable, false, environmentContext, true);

HiveMaterializedViewsRegistry.get().refresh(Hive.get());

HiveRelOptMaterialization materialization =
HiveMaterializedViewsRegistry.get().getRewritingMaterializedView(DB, MV1, RewriteAlgorithm.ALL);
Assert.assertNull(materialization);
}

@Test
public void testRefreshAddsMVEnabledForRewrite() throws Exception {
// init the registry
HiveMaterializedViewsRegistry.get().refresh(Hive.get());

executeStatementOnDriver("create materialized view " + MV1 + " disabled rewrite as " +
"select a,b,c from " + TABLE1 + " where a > 0 or a is null", driver);

Table mvTable = Hive.get().getTable(DB, MV1);
mvTable.setRewriteEnabled(true);

EnvironmentContext environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
Hive.get().alterTable(mvTable, false, environmentContext, true);

HiveMaterializedViewsRegistry.get().refresh(Hive.get());

HiveRelOptMaterialization materialization =
HiveMaterializedViewsRegistry.get().getRewritingMaterializedView(DB, MV1, RewriteAlgorithm.ALL);

Assert.assertEquals(DB, materialization.qualifiedTableName.get(0));
Assert.assertEquals(MV1, materialization.qualifiedTableName.get(1));
}

@Test
public void testRefreshRemovesMVDoesNotExists() throws Exception {
// init the registry
HiveMaterializedViewsRegistry.get().refresh(Hive.get());

executeStatementOnDriver("create materialized view " + MV1 + " as " +
"select a,b,c from " + TABLE1 + " where a > 0 or a is null", driver);

Hive.get().dropTable(DB, MV1);

HiveMaterializedViewsRegistry.get().refresh(Hive.get());

HiveRelOptMaterialization materialization =
HiveMaterializedViewsRegistry.get().getRewritingMaterializedView(DB, MV1, RewriteAlgorithm.ALL);
Assert.assertNull(materialization);
}

private static CreateMaterializedViewDesc createMaterializedViewDesc() {
Map<String, String> tableProperties = new HashMap<>();
tableProperties.put("created_with_ctas", "true");

CreateMaterializedViewDesc createMaterializedViewDesc = new CreateMaterializedViewDesc(
MV1,
null,
null,
tableProperties,
null,
null,
null,
false,
true,
"org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
"org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
null,
"org.apache.hadoop.hive.ql.io.orc.OrcSerde",
null,
new HashMap<>());

createMaterializedViewDesc.setViewOriginalText("select a,b,c from " + TABLE1 + " where a > 0 or a is null");
createMaterializedViewDesc.setViewExpandedText("select `t1`.`a`,`t1`.`b`,`t1`.`c` from `" + DB + "`.`" + TABLE1 + "` where `t1`.`a` > 0 or `t1`.`a` is null");
createMaterializedViewDesc.setSchema(
asList(new FieldSchema("a", "int", null),
new FieldSchema("b", "string", null),
new FieldSchema("c", "float", null)));

return createMaterializedViewDesc;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.txn.compactor;

import org.junit.Assert;
import org.junit.Test;

import java.util.Arrays;
import java.util.List;

import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.execSelectAndDumpData;
import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver;
import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriverSilently;

public class TestQueryRewrite extends CompactorOnTezTest {

private static final String TABLE1 = "t1";
private static final String MV1 = "mat1";

private static final List<String> ORIGINAL_QUERY_PLAN = Arrays.asList(
"CBO PLAN:",
"HiveProject(a=[$0], b=[$1])",
" HiveFilter(condition=[>($0, 0)])",
" HiveTableScan(table=[[default, t1]], table:alias=[t1])",
""
);

@Override
public void setup() throws Exception {
super.setup();

executeStatementOnDriverSilently("drop materialized view if exists " + MV1, driver);
executeStatementOnDriverSilently("drop table if exists" + TABLE1 , driver);

executeStatementOnDriver("create table " + TABLE1 + "(a int, b string, c float) stored as orc TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("insert into " + TABLE1 + "(a,b, c) values (1, 'one', 1.1), (2, 'two', 2.2), (NULL, NULL, NULL)", driver);
executeStatementOnDriver("create materialized view " + MV1 + " stored by iceberg tblproperties('format-version'='2') as " +
"select a,b,c from " + TABLE1 + " where a > 0 or a is null", driver);
}

@Override
public void tearDown() {
executeStatementOnDriverSilently("drop materialized view " + MV1, driver);
executeStatementOnDriverSilently("drop table " + TABLE1 , driver);

super.tearDown();
}

@Test
public void testQueryIsNotRewrittenWhenMVIsDropped() throws Exception {

// Simulate a multi HS2 cluster.
// Drop the MV using a direct API call to HMS. This is similar what happening when the drop MV is executed by
// another HS2.
// In this case the MV is not removed from HiveMaterializedViewsRegistry of HS2 which runs the explain query.
msClient.dropTable("default", MV1);

List<String> result = execSelectAndDumpData("explain cbo select a, b from " + TABLE1 + " where a > 0", driver, "");
Assert.assertEquals(ORIGINAL_QUERY_PLAN, result);

}
}
3 changes: 2 additions & 1 deletion ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
Expand Down Expand Up @@ -2378,7 +2379,7 @@ public List<Table> getAllMaterializedViewObjectsForRewriting() throws HiveExcept
public List<HiveRelOptMaterialization> getMaterializedViewsByAST(
ASTNode astNode, Set<TableName> tablesUsed, Supplier<String> validTxnsList, HiveTxnManager txnMgr) throws HiveException {

List<HiveRelOptMaterialization> materializedViews =
Collection<HiveRelOptMaterialization> materializedViews =
HiveMaterializedViewsRegistry.get().getRewritingMaterializedViews(astNode);
if (materializedViews.isEmpty()) {
return Collections.emptyList();
Expand Down
Loading

0 comments on commit 3ce1cc6

Please sign in to comment.