diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index fb2e136c4807..0112b9524270 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -28,7 +28,19 @@ bool Contains(const auto& names, std::string_view str) { return std::find(std::begin(names), std::end(names), str) != std::end(names); } -constexpr std::string_view ImplTables[] = {ImplTable, NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable}; +constexpr std::string_view ImplTables[] = { + ImplTable, NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable, +}; + +constexpr std::string_view GlobalSecondaryImplTables[] = { + ImplTable, +}; +static_assert(std::is_sorted(std::begin(GlobalSecondaryImplTables), std::end(GlobalSecondaryImplTables))); + +constexpr std::string_view GlobalKMeansTreeImplTables[] = { + NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable, +}; +static_assert(std::is_sorted(std::begin(GlobalKMeansTreeImplTables), std::end(GlobalKMeansTreeImplTables))); } @@ -142,11 +154,11 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn return true; } -TVector GetImplTables(NKikimrSchemeOp::EIndexType indexType) { +std::span GetImplTables(NKikimrSchemeOp::EIndexType indexType) { if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { - return { NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable }; + return GlobalKMeansTreeImplTables; } else { - return { ImplTable }; + return GlobalSecondaryImplTables; } } diff --git a/ydb/core/base/table_index.h b/ydb/core/base/table_index.h index 90b48c40f3c1..75d1a21aabdd 100644 --- a/ydb/core/base/table_index.h +++ b/ydb/core/base/table_index.h @@ -7,6 +7,9 @@ #include #include +#include +#include + namespace NKikimr::NTableIndex { struct TTableColumns { @@ -24,7 +27,7 @@ inline constexpr const char* ImplTable = "indexImplTable"; bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index, TString& explain); TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index); -TVector GetImplTables(NKikimrSchemeOp::EIndexType indexType); +std::span GetImplTables(NKikimrSchemeOp::EIndexType indexType); bool IsImplTable(std::string_view tableName); bool IsBuildImplTable(std::string_view tableName); diff --git a/ydb/core/kqp/gateway/kqp_metadata_loader.cpp b/ydb/core/kqp/gateway/kqp_metadata_loader.cpp index db2f711816ae..2abdf0d1866f 100644 --- a/ydb/core/kqp/gateway/kqp_metadata_loader.cpp +++ b/ydb/core/kqp/gateway/kqp_metadata_loader.cpp @@ -2,6 +2,7 @@ #include "actors/kqp_ic_gateway_actors.h" #include +#include #include #include #include @@ -175,7 +176,7 @@ TTableMetadataResult GetTableMetadataResult(const NSchemeCache::TSchemeCacheNavi THashMap sequences; for (const auto& sequenceDesc : entry.Sequences) { - sequences[sequenceDesc.GetName()] = + sequences[sequenceDesc.GetName()] = NYql::TKikimrPathId(sequenceDesc.GetPathId().GetOwnerId(), sequenceDesc.GetPathId().GetLocalId()); } @@ -187,7 +188,7 @@ TTableMetadataResult GetTableMetadataResult(const NSchemeCache::TSchemeCacheNavi const TString typeName = GetTypeName(NScheme::TTypeInfoMod{columnDesc.PType, columnDesc.PTypeMod}); auto defaultKind = NKikimrKqp::TKqpColumnMetadataProto::DEFAULT_KIND_UNSPECIFIED; NYql::TKikimrPathId defaultFromSequencePathId = {}; - + if (columnDesc.IsDefaultFromSequence()) { defaultKind = NKikimrKqp::TKqpColumnMetadataProto::DEFAULT_KIND_SEQUENCE; auto sequenceIt = sequences.find(columnDesc.DefaultFromSequence); @@ -196,7 +197,7 @@ TTableMetadataResult GetTableMetadataResult(const NSchemeCache::TSchemeCacheNavi } else if (columnDesc.IsDefaultFromLiteral()) { defaultKind = NKikimrKqp::TKqpColumnMetadataProto::DEFAULT_KIND_LITERAL; } - + tableMeta->Columns.emplace( columnDesc.Name, NYql::TKikimrColumnMetadata( @@ -400,11 +401,15 @@ TString GetDebugString(const std::pair& id) { return TStringBuilder() << " Path: " << id.second << " TableId: " << id.first; } -void UpdateMetadataIfSuccess(NYql::TKikimrTableMetadataPtr ptr, size_t idx, const TTableMetadataResult& value) { - if (value.Success()) { - ptr->SecondaryGlobalIndexMetadata[idx] = value.Metadata; +void UpdateMetadataIfSuccess(NYql::TKikimrTableMetadataPtr& implTable, TTableMetadataResult& value) { + YQL_ENSURE(value.Success()); + if (!implTable) { + implTable = std::move(value.Metadata); + return; } - + YQL_ENSURE(!implTable->Next); + YQL_ENSURE(implTable->Name < value.Metadata->Name); + implTable->Next = std::move(value.Metadata); } void SetError(TTableMetadataResult& externalDataSourceMetadata, const TString& error) { @@ -618,28 +623,21 @@ NThreading::TFuture TKqpTableMetadataLoader::LoadIndexMeta const auto& tableName = tableMetadata->Name; const size_t indexesCount = tableMetadata->Indexes.size(); - TVector> children; + TVector> children; children.reserve(indexesCount); - tableMetadata->SecondaryGlobalIndexMetadata.resize(indexesCount); const ui64 tableOwnerId = tableMetadata->PathId.OwnerId(); for (size_t i = 0; i < indexesCount; i++) { const auto& index = tableMetadata->Indexes[i]; - const auto indexTablePaths = NSchemeHelpers::CreateIndexTablePath(tableName, index.Type, index.Name); - for (const auto& indexTablePath : indexTablePaths) { + const auto implTablePaths = NSchemeHelpers::CreateIndexTablePath(tableName, index.Type, index.Name); + for (const auto& implTablePath : implTablePaths) { if (!index.SchemaVersion) { LOG_DEBUG_S(*ActorSystem, NKikimrServices::KQP_GATEWAY, "Load index metadata without schema version check index: " << index.Name); children.push_back( - LoadTableMetadata(cluster, indexTablePath, + LoadTableMetadata(cluster, implTablePath, TLoadTableMetadataSettings().WithPrivateTables(true), database, userToken) - .Apply([i, tableMetadata](const TFuture& result) { - auto value = result.GetValue(); - UpdateMetadataIfSuccess(tableMetadata, i, value); - return static_cast(value); - }) ); - } else { LOG_DEBUG_S(*ActorSystem, NKikimrServices::KQP_GATEWAY, "Load index metadata with schema version check" << "index: " << index.Name @@ -650,12 +648,7 @@ NThreading::TFuture TKqpTableMetadataLoader::LoadIndexMeta auto ownerId = index.PathOwnerId ? index.PathOwnerId : tableOwnerId; //for compat with 20-2 children.push_back( LoadIndexMetadataByPathId(cluster, - NKikimr::TIndexId(ownerId, index.LocalPathId, index.SchemaVersion), indexTablePath, database, userToken) - .Apply([i, tableMetadata](const TFuture& result) { - auto value = result.GetValue(); - UpdateMetadataIfSuccess(tableMetadata, i, value); - return static_cast(value); - }) + NKikimr::TIndexId(ownerId, index.LocalPathId, index.SchemaVersion), implTablePath, database, userToken) ); } @@ -666,14 +659,26 @@ NThreading::TFuture TKqpTableMetadataLoader::LoadIndexMeta auto loadIndexMetadataChecker = [ptr, result{std::move(loadTableMetadataResult)}, children](const NThreading::TFuture) mutable { bool loadOk = true; - for (const auto& child : children) { - result.AddIssues(child.GetValue().Issues()); - if (!child.GetValue().Success()) { - loadOk = false; + + const auto indexesCount = result.Metadata->Indexes.size(); + result.Metadata->ImplTables.resize(indexesCount); + auto it = children.begin(); + for (size_t i = 0; i < indexesCount; i++) { + for (const auto& _ : NTableIndex::GetImplTables(NYql::TIndexDescription::ConvertIndexType( + result.Metadata->Indexes[i].Type))) { + YQL_ENSURE(it != children.end()); + auto value = it++->ExtractValue(); + result.AddIssues(value.Issues()); + if (loadOk && (loadOk = value.Success())) { + UpdateMetadataIfSuccess(result.Metadata->ImplTables[i], value); + } } } + YQL_ENSURE(it == children.end()); + auto locked = ptr.lock(); if (!loadOk || !locked) { + result.Metadata->ImplTables.clear(); result.SetStatus(TIssuesIds::KIKIMR_INDEX_METADATA_LOAD_FAILED); } else { locked->OnLoadedTableMetadata(result); @@ -909,6 +914,9 @@ NThreading::TFuture TKqpTableMetadataLoader::LoadTableMeta case EKind::KindIndex: { Y_ENSURE(entry.ListNodeEntry, "expected children list"); for (const auto& child : entry.ListNodeEntry->Children) { + if (!table.EndsWith(child.Name)) { + continue; + } TIndexId pathId = TIndexId(child.PathId, child.SchemaVersion); LoadTableMetadataCache(cluster, std::make_pair(pathId, table), settings, database, userToken) @@ -916,6 +924,7 @@ NThreading::TFuture TKqpTableMetadataLoader::LoadTableMeta { promise.SetValue(result.GetValue()); }); + break; } break; } diff --git a/ydb/core/kqp/host/kqp_type_ann.cpp b/ydb/core/kqp/host/kqp_type_ann.cpp index c560104ab571..80b7331c30c3 100644 --- a/ydb/core/kqp/host/kqp_type_ann.cpp +++ b/ydb/core/kqp/host/kqp_type_ann.cpp @@ -246,7 +246,7 @@ TStatus AnnotateReadTable(const TExprNode::TPtr& node, TExprContext& ctx, const TKikimrTableMetadataPtr meta; if (readIndex) { - meta = table.second->Metadata->GetIndexMetadata(TString(node->Child(TKqlReadTableIndex::idx_Index)->Content())).first; + meta = table.second->Metadata->GetIndexMetadata(node->Child(TKqlReadTableIndex::idx_Index)->Content()).first; if (!meta) { return TStatus::Error; } @@ -455,7 +455,7 @@ TStatus AnnotateLookupTable(const TExprNode::TPtr& node, TExprContext& ctx, cons if (isStreamLookup && !EnsureArgsCount(*node, TKqlStreamLookupIndex::Match(node.Get()) ? 5 : 4, ctx)) { return TStatus::Error; } - + if (!isStreamLookup && !EnsureArgsCount(*node, TKqlLookupIndexBase::Match(node.Get()) ? 4 : 3, ctx)) { return TStatus::Error; } @@ -560,7 +560,7 @@ TStatus AnnotateLookupTable(const TExprNode::TPtr& node, TExprContext& ctx, cons if (!EnsureAtom(*index, ctx)) { return TStatus::Error; } - auto indexMeta = table.second->Metadata->GetIndexMetadata(TString(index->Content())).first; + auto indexMeta = table.second->Metadata->GetIndexMetadata(index->Content()).first; if (!CalcKeyColumnsCount(ctx, node->Pos(), *structType, *table.second, *indexMeta, keyColumnsCount)) { return TStatus::Error; @@ -713,7 +713,7 @@ TStatus AnnotateUpsertRows(const TExprNode::TPtr& node, TExprContext& ctx, const } if (TKqlUpsertRowsIndex::Match(node.Get())) { - Y_ENSURE(!table.second->Metadata->SecondaryGlobalIndexMetadata.empty()); + Y_ENSURE(!table.second->Metadata->ImplTables.empty()); } auto effectType = MakeKqpEffectType(ctx); @@ -1683,7 +1683,7 @@ TStatus AnnotateStreamLookupConnection(const TExprNode::TPtr& node, TExprContext } else if (settings.Strategy == EStreamLookupStrategyType::LookupJoinRows || settings.Strategy == EStreamLookupStrategyType::LookupSemiJoinRows) { - + if (!EnsureTupleType(node->Pos(), *inputItemType, ctx)) { return TStatus::Error; } @@ -1820,7 +1820,7 @@ TStatus AnnotateIndexLookupJoin(const TExprNode::TPtr& node, TExprContext& ctx) } else { node->SetTypeAnn(ctx.MakeType(outputRowType)); } - + return TStatus::Ok; } diff --git a/ydb/core/kqp/opt/kqp_opt_kql.cpp b/ydb/core/kqp/opt/kqp_opt_kql.cpp index d201edaf0285..5aa56c8c93f0 100644 --- a/ydb/core/kqp/opt/kqp_opt_kql.cpp +++ b/ydb/core/kqp/opt/kqp_opt_kql.cpp @@ -79,7 +79,7 @@ std::pair CreateRowsToReplace(const TExprBase& input, bool HasIndexesToWrite(const TKikimrTableDescription& tableData) { bool hasIndexesToWrite = false; - YQL_ENSURE(tableData.Metadata->Indexes.size() == tableData.Metadata->SecondaryGlobalIndexMetadata.size()); + YQL_ENSURE(tableData.Metadata->Indexes.size() == tableData.Metadata->ImplTables.size()); for (const auto& index : tableData.Metadata->Indexes) { if (index.ItUsedForWrite()) { hasIndexesToWrite = true; @@ -893,7 +893,7 @@ TIntrusivePtr GetIndexMetadata(const TKqlReadTableIndex& r const TKikimrTablesData& tables, TStringBuf cluster) { const auto& tableDesc = GetTableData(tables, cluster, read.Table().Path()); - const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(read.Index().StringValue()); + const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(read.Index().Value()); return indexMeta; } diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index f84d747611f4..0bf72b718ba1 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -52,7 +52,7 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo auto keyColumns = inputStats->KeyColumns; if (auto indexRead = inputNode.Maybe()) { const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, indexRead.Cast().Table().Path().Value()); - const auto& [indexMeta, _] = tableData.Metadata->GetIndexMetadata(indexRead.Cast().Index().StringValue()); + const auto& [indexMeta, _] = tableData.Metadata->GetIndexMetadata(indexRead.Cast().Index().Value()); keyColumns = TIntrusivePtr( new TOptimizerStatistics::TKeyColumns(indexMeta->KeyColumnNames)); @@ -90,15 +90,15 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo double byteSize = nRows * sizePerRow * (nAttrs / (double)inputStats->Ncols); auto stats = std::make_shared( - EStatisticsType::BaseTable, - nRows, - nAttrs, - byteSize, - 0.0, + EStatisticsType::BaseTable, + nRows, + nAttrs, + byteSize, + 0.0, keyColumns, inputStats->ColumnStatistics, inputStats->StorageType); - stats->SortColumns = sortedPrefixPtr; + stats->SortColumns = sortedPrefixPtr; YQL_CLOG(TRACE, CoreDq) << "Infer statistics for read table" << stats->ToString(); @@ -177,11 +177,11 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon /** * Infer statistic for Kqp steam lookup operator - * + * * In reality we want to compute the number of rows and cost that the lookyup actually performed. * But currently we just take the data from the base table, and join cardinality will still work correctly, * because it considers joins on PK. - * + * * In the future it would be better to compute the actual cardinality */ void InferStatisticsForSteamLookup(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { @@ -198,17 +198,17 @@ void InferStatisticsForSteamLookup(const TExprNode::TPtr& input, TTypeAnnotation auto byteSize = tableStats->ByteSize * (nAttrs / (double) tableStats->Ncols) * inputStats->Selectivity; auto res = std::make_shared( - EStatisticsType::BaseTable, - inputStats->Nrows, - nAttrs, - byteSize, - 0, + EStatisticsType::BaseTable, + inputStats->Nrows, + nAttrs, + byteSize, + 0, inputStats->KeyColumns, inputStats->ColumnStatistics, inputStats->StorageType); res->SortColumns = inputStats->SortColumns; - typeCtx->SetStats(input.Get(), res); + typeCtx->SetStats(input.Get(), res); } @@ -265,7 +265,7 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn auto keyColumns = inputStats->KeyColumns; if (auto indexRead = inputNode.Maybe()) { const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, indexRead.Cast().Table().Path().Value()); - const auto& [indexMeta, _] = tableData.Metadata->GetIndexMetadata(indexRead.Cast().Index().StringValue()); + const auto& [indexMeta, _] = tableData.Metadata->GetIndexMetadata(indexRead.Cast().Index().Value()); keyColumns = TIntrusivePtr( new TOptimizerStatistics::TKeyColumns(indexMeta->KeyColumnNames)); @@ -294,19 +294,19 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn double cost = inputStats->Cost; auto outputStats = std::make_shared( - EStatisticsType::BaseTable, - nRows, - nAttrs, - byteSize, - cost, - keyColumns, + EStatisticsType::BaseTable, + nRows, + nAttrs, + byteSize, + cost, + keyColumns, inputStats->ColumnStatistics, inputStats->StorageType); outputStats->SortColumns = std::move(sortedPrefixPtr); YQL_CLOG(TRACE, CoreDq) << "Infer statistics for source settings: " << outputStats->ToString(); - typeCtx->SetStats(input.Get(), outputStats); + typeCtx->SetStats(input.Get(), outputStats); } /** @@ -334,7 +334,7 @@ void InferStatisticsForReadTableIndexRanges(const TExprNode::TPtr& input, TTypeA if (!inputStats) { return; } - + TVector indexColumns; for (auto c : indexRanges.Columns()) { indexColumns.push_back(c.StringValue()); @@ -360,11 +360,11 @@ void InferStatisticsForReadTableIndexRanges(const TExprNode::TPtr& input, TTypeA sortedPrefixPtr = TIntrusivePtr(new TOptimizerStatistics::TSortColumns(sortedPrefixCols, sortedPrefixAliases)); } auto stats = std::make_shared( - inputStats->Type, + inputStats->Type, inputStats->Nrows, - inputStats->Ncols, - inputStats->ByteSize, - inputStats->Cost, + inputStats->Ncols, + inputStats->ByteSize, + inputStats->Cost, indexColumnsPtr, inputStats->ColumnStatistics, inputStats->StorageType); @@ -418,7 +418,7 @@ void InferStatisticsForLookupJoin(const TExprNode::TPtr& input, TTypeAnnotationC /*** * Infer statistics for result binding of a stage */ -void InferStatisticsForResultBinding(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, +void InferStatisticsForResultBinding(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, TVector>>& txStats) { auto inputNode = TExprBase(input); @@ -478,12 +478,12 @@ class TKqpOlapPredicateSelectivityComputer: public TPredicateSelectivityComputer TExprContext dummyCtx; TPositionHandle dummyPos; - auto rowArg = + auto rowArg = Build(dummyCtx, dummyPos) .Name("row") .Done(); - auto member = + auto member = Build(dummyCtx, dummyPos) .Struct(rowArg) .Name().Build(attr) @@ -678,7 +678,7 @@ double EstimateRowSize(const TStructExprType& rowType, const TString& format, co } else if (compression == "xz") { compressionRatio *= 1.45; } - result /= compressionRatio; + result /= compressionRatio; } } @@ -745,7 +745,7 @@ void InferStatisticsForDqSourceWrap(const TExprNode::TPtr& input, TTypeAnnotatio if (stats->Ncols == 0 || stats->Ncols > static_cast(rowType->GetSize()) || stats->Nrows == 0 || stats->ByteSize == 0.0 || stats->Cost == 0.0) { auto newSpecific = std::make_shared(*specific); - + auto sortColumns = stats->SortColumns; stats = std::make_shared(stats->Type, stats->Nrows, stats->Ncols, stats->ByteSize, stats->Cost, stats->KeyColumns, stats->ColumnStatistics, stats->StorageType, newSpecific); stats->SortColumns = std::move(sortColumns); @@ -789,7 +789,7 @@ void InferStatisticsForDqSourceWrap(const TExprNode::TPtr& input, TTypeAnnotatio * When encountering a KqpPhysicalTx, we save the results of the stage in a vector * where it can later be accessed via binding parameters */ -void AppendTxStats(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, +void AppendTxStats(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, TVector>>& txStats) { auto inputNode = TExprBase(input); @@ -854,7 +854,7 @@ bool TKqpStatisticsTransformer::BeforeLambdasSpecific(const TExprNode::TPtr& inp } else if(TDqSourceWrapBase::Match(input.Get())) { InferStatisticsForDqSourceWrap(input, TypeCtx, KqpCtx); - } + } else if (TKqpOlapFilter::Match(input.Get())) { InferStatisticsForOlapFilter(input, TypeCtx); } diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_helpers.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_helpers.cpp index 703e71bdfcaa..b27e778a23c8 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_helpers.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_helpers.cpp @@ -98,7 +98,7 @@ TMaybe RewriteReadToPrefixLookup(TKqlReadTableBase read, TExprCon // we don't need to make filter for point selection if (!(prefixSize == from.ArgCount() && - prefixSize == to.ArgCount() && + prefixSize == to.ArgCount() && from.template Maybe() && to.template Maybe())) { @@ -158,7 +158,7 @@ TMaybe RewriteReadToPrefixLookup(TKqlReadTableRangesBase read, TE if (auto indexRead = read.template Maybe()) { const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, read.Table().Path()); - const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(indexRead.Index().Cast().StringValue()); + const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(indexRead.Index().Cast().Value()); lookupTable = indexMeta->Name; indexName = indexRead.Cast().Index().StringValue(); } else { diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp index 865e4809ac73..700f360c9d2c 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -15,8 +17,8 @@ using namespace NYql::NNodes; namespace { -TCoAtomList BuildKeyColumnsList(const TKikimrTableDescription& table, TPositionHandle pos, TExprContext& ctx) { - TSet columnsToSelect(table.Metadata->KeyColumnNames.begin(), table.Metadata->KeyColumnNames.end()); +TCoAtomList BuildKeyColumnsList(const TKikimrTableDescription& table, TPositionHandle pos, TExprContext& ctx, + const auto& columnsToSelect) { TVector columnsList; columnsList.reserve(columnsToSelect.size()); for (auto column : columnsToSelect) { @@ -32,6 +34,10 @@ TCoAtomList BuildKeyColumnsList(const TKikimrTableDescription& table, TPositionH .Done(); } +TCoAtomList BuildKeyColumnsList(const TKikimrTableDescription& table, TPositionHandle pos, TExprContext& ctx) { + return BuildKeyColumnsList(table, pos, ctx, table.Metadata->KeyColumnNames); +} + TCoAtomList MergeColumns(const NNodes::TCoAtomList& col1, const TVector& col2, TExprContext& ctx) { TMap columns; for (const auto& c : col1) { @@ -139,6 +145,65 @@ bool CanPushTopSort(const TCoTopBase& node, const TKikimrTableDescription& index return IsTableExistsKeySelector(node.KeySelectorLambda(), indexDesc, columns); } +bool CanUseVectorIndex(const TIndexDescription& indexDesc, const TExprBase& lambdaBody, const TCoTopBase& top) { + Y_ASSERT(indexDesc.Type == TIndexDescription::EType::GlobalSyncVectorKMeansTree); + // TODO(mbkkt) We need to account top.Count(), but not clear what to if it's value is runtime? + auto checkMember = [&] (const TExprBase& expr) { + auto member = expr.Maybe(); + return member && member.Cast().Name().Value() == indexDesc.KeyColumns[0]; + }; + auto checkUdf = [&] (const TExprBase& expr, bool checkMembers) { + auto apply = expr.Maybe(); + if (!apply || apply.Cast().Args().Count() != 3) { + return false; + } + if (checkMembers) { + auto args = apply.Cast().Args(); + if (absl::c_none_of(args, [&] (const TExprBase& expr) { return checkMember(expr); })) { + return false; + } + } + auto udf = apply.Cast().Callable().Maybe(); + if (!udf) { + return false; + } + auto directions = top.SortDirections().Maybe(); + if (!directions) { + return false; + } + const bool asc = directions.Cast().Literal().Value() == "true"; + const auto methodName = udf.Cast().MethodName().Value(); + auto& desc = std::get(indexDesc.SpecializedIndexDescription); + switch (desc.settings().settings().metric()) { + case Ydb::Table::VectorIndexSettings::SIMILARITY_INNER_PRODUCT: + return !asc && methodName == "Knn.InnerProductSimilarity"; + case Ydb::Table::VectorIndexSettings::SIMILARITY_COSINE: + case Ydb::Table::VectorIndexSettings::DISTANCE_COSINE: + if (asc) { + return methodName == "Knn.CosineDistance"; + } else { + return methodName == "Knn.CosineSimilarity"; + } + case Ydb::Table::VectorIndexSettings::DISTANCE_MANHATTAN: + return asc && methodName == "Knn.ManhattanDistance"; + case Ydb::Table::VectorIndexSettings::DISTANCE_EUCLIDEAN: + return asc && methodName == "Knn.EuclideanDistance"; + default: + Y_UNREACHABLE(); + } + }; + auto flatMap = lambdaBody.Maybe(); + if (!flatMap) { + return checkUdf(lambdaBody, true); + } + auto flatMapInput = flatMap.Cast().Input(); + if (!checkMember(flatMapInput)) { + return false; + } + auto flatMapLambdaBody = flatMap.Cast().Lambda().Body(); + return checkUdf(flatMapLambdaBody, false); +} + struct TReadMatch { TMaybeNode Read; TMaybeNode ReadRanges; @@ -258,7 +323,7 @@ TExprBase DoRewriteIndexRead(const TReadMatch& read, TExprContext& ctx, const bool needDataRead = CheckIndexCovering(read, indexMeta); if (read.FullScan()) { - TString indexName = read.Index().StringValue(); + const auto indexName = read.Index().StringValue(); auto issue = TIssue(ctx.GetPosition(read.Pos()), "Given predicate is not suitable for used index: " + indexName); SetIssueCode(EYqlIssueCode::TIssuesIds_EIssueCode_KIKIMR_WRONG_INDEX_USAGE, issue); ctx.AddWarning(issue); @@ -334,15 +399,182 @@ TExprBase DoRewriteIndexRead(const TReadMatch& read, TExprContext& ctx, } } +TExprBase DoRewriteTopSortOverKMeansTree( + const TReadMatch& read, const TMaybeNode& flatMap, const TExprNode& lambdaArgs, const TExprBase& lambdaBody, const TCoTopBase& top, + TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, + const TKikimrTableDescription& tableDesc, const TIndexDescription& indexDesc, const TKikimrTableMetadata& implTable) +{ + Y_ASSERT(indexDesc.Type == TIndexDescription::EType::GlobalSyncVectorKMeansTree); + const auto* levelTableDesc = &kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, implTable.Name); + const auto* postingTableDesc = &kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, implTable.Next->Name); + YQL_ENSURE(!implTable.Next->Next); + YQL_ENSURE(levelTableDesc->Metadata->Name.EndsWith(NTableIndex::NTableVectorKmeansTreeIndex::LevelTable)); + YQL_ENSURE(postingTableDesc->Metadata->Name.EndsWith(NTableIndex::NTableVectorKmeansTreeIndex::PostingTable)); + + // TODO(mbkkt) It's kind of strange that almost everything here have same position + const auto pos = read.Pos(); + + auto levelTable = BuildTableMeta(*levelTableDesc->Metadata, pos, ctx); + auto postingTable = BuildTableMeta(*postingTableDesc->Metadata, pos, ctx); + auto mainTable = BuildTableMeta(*tableDesc.Metadata, pos, ctx); + + auto levelColumns = BuildKeyColumnsList(*levelTableDesc, pos, ctx, + std::initializer_list{NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn}); + auto postingColumns = BuildKeyColumnsList(*postingTableDesc, pos, ctx, tableDesc.Metadata->KeyColumnNames); + const auto& mainColumns = read.Columns(); + + auto mapArg = Build(ctx, pos) + .Name("mapArg") + .Done(); + TVector mapMembers{ + Build(ctx, pos) + .Name().Build(NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn) + .Value().Struct(mapArg) + .Name().Build(NTableIndex::NTableVectorKmeansTreeIndex::IdColumn) + .Build() + .Done() + }; + + // TODO(mbkkt) How to inline construction of these constants to construction of readLevel0? + auto fromValues = ctx.Builder(pos) + .Callable("Uint32").Atom(0, "0", TNodeFlags::Default).Seal() + .Build(); + auto toValues = ctx.Builder(pos) + .Callable("Uint32").Atom(0, "1", TNodeFlags::Default).Seal() + .Build(); + + // TODO(mbkkt) count should be customizable via query options + auto count = ctx.Builder(pos) + .Callable("Uint64").Atom(0, "2", TNodeFlags::Default).Seal() + .Build(); + + // TODO(mbkkt) Is it best way to do `SELECT FROM levelTable WHERE first_pk_column = 0`? + auto readLevel0 = Build(ctx, pos) + .Table(levelTable) + .Range() + .From() + .Add(fromValues) + .Build() + .To() + .Add(toValues) + .Build() + .Build() + .Columns(levelColumns) + .Settings(read.Settings()) + .Done(); + + auto levelLambda = [&] { + const auto oldArgNodes = lambdaArgs.Children(); + TNodeOnNodeOwnedMap replaces(oldArgNodes.size()); + TExprNode::TListType newArgNodes; + newArgNodes.reserve(oldArgNodes.size()); + for (const auto& arg : oldArgNodes) { + auto newArg = ctx.ShallowCopy(*arg); + YQL_ENSURE(replaces.emplace(arg.Get(), newArg).second); + newArgNodes.emplace_back(std::move(newArg)); + } + auto newLambda = TExprBase{ctx.NewLambda(pos, + ctx.NewArguments(pos, std::move(newArgNodes)), + ctx.ReplaceNodes(TExprNode::TListType{lambdaBody.Ptr()}, replaces))} + .Cast(); + auto args = newLambda.Args().Ptr(); + replaces.clear(); + auto flatMap = newLambda.Body().Maybe(); + if (!flatMap) { + auto apply = newLambda.Body().Cast(); + for (auto arg : apply.Args()) { + auto oldMember = arg.Maybe(); + if (oldMember && oldMember.Cast().Name().Value() == indexDesc.KeyColumns[0]) { + auto newMember = Build(ctx, pos) + .Name().Build(NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn) + .Struct(oldMember.Cast().Struct()) + .Done(); + replaces.emplace(oldMember.Raw(), newMember.Ptr()); + break; + } + } + return ctx.NewLambda(pos, + std::move(args), + ctx.ReplaceNodes(TExprNode::TListType{apply.Ptr()}, replaces)); + } + auto apply = flatMap.Cast().Lambda().Body().Cast(); + for (auto arg : apply.Args()) { + if (arg.Ref().Type() == NYql::TExprNode::Argument) { + auto oldMember = flatMap.Cast().Input().Cast(); + auto newMember = Build(ctx, pos) + .Name().Build(NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn) + .Struct(oldMember.Struct()) + .Done(); + replaces.emplace(arg.Raw(), newMember.Ptr()); + break; + } + } + return ctx.NewLambda(pos, + std::move(args), + ctx.ReplaceNodes(TExprNode::TListType{apply.Ptr()}, replaces)); + }(); + + auto topLevel0 = Build(ctx, pos) + .Input(readLevel0) + // TODO(mbkkt) how to construct our own lambda? + // Maybe good idea is construct lambda with knn udf and two member access as arguments + // and then replace one of them to argument without access to indexed field... + .KeySelectorLambda(levelLambda) + .SortDirections(top.SortDirections()) + .Count(count) + .Done(); + + auto mapLevel0 = Build(ctx, pos) + .Input(topLevel0) + .Lambda() + .Args({mapArg}) + .Body().Add(mapMembers).Build() + .Build() + .Done(); + + auto postingRead = Build(ctx, pos) + .Table(postingTable) + .LookupKeys(mapLevel0) + .Columns(postingColumns) + .Done(); + + auto mainRead = Build(ctx, pos) + .Table(mainTable) + .LookupKeys(postingRead) + .Columns(mainColumns) + .Done().Ptr(); + + if (flatMap) { + mainRead = Build(ctx, flatMap.Cast().Pos()) + .Input(mainRead) + .Lambda(ctx.DeepCopyLambda(flatMap.Cast().Lambda().Ref())) + .Done().Ptr(); + } + + auto mainTop = Build(ctx, top.Pos()) + .CallableName(top.Ref().Content()) + .Input(mainRead) + .KeySelectorLambda(ctx.DeepCopyLambda(top.KeySelectorLambda().Ref())) + .SortDirections(top.SortDirections()) + .Count(top.Count()) + .Done(); + + return mainTop; +} + } // namespace TExprBase KqpRewriteIndexRead(const TExprBase& node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { if (auto indexRead = TReadMatch::Match(node)) { const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, indexRead.Table().Path()); - const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(TString(indexRead.Index().Value())); + const auto indexName = indexRead.Index().Value(); + auto [implTable, indexDesc] = tableDesc.Metadata->GetIndex(indexName); + // TODO(mbkkt) instead of ensure should be warning and main table read? + YQL_ENSURE(indexDesc->Type != TIndexDescription::EType::GlobalSyncVectorKMeansTree, + "index read doesn't support vector index: " << indexName); - return DoRewriteIndexRead(indexRead, ctx, tableDesc, indexMeta, kqpCtx.IsScanQuery(), {}); + return DoRewriteIndexRead(indexRead, ctx, tableDesc, implTable, kqpCtx.IsScanQuery(), {}); } return node; @@ -358,16 +590,20 @@ TExprBase KqpRewriteLookupIndex(const TExprBase& node, TExprContext& ctx, const auto lookupIndex = maybeLookupIndex.Cast(); const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, lookupIndex.Table().Path()); - const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(lookupIndex.Index().StringValue()); + const auto indexName = lookupIndex.Index().Value(); + auto [implTable, indexDesc] = tableDesc.Metadata->GetIndex(indexName); + // TODO(mbkkt) instead of ensure should be warning and main table lookup? + YQL_ENSURE(indexDesc->Type != TIndexDescription::EType::GlobalSyncVectorKMeansTree, + "lookup doesn't support vector index: " << indexName); - const bool needDataRead = CheckIndexCovering(lookupIndex, indexMeta); + const bool needDataRead = CheckIndexCovering(lookupIndex, implTable); if (!needDataRead) { if (kqpCtx.Config->EnableKqpDataQueryStreamLookup) { TKqpStreamLookupSettings settings; settings.Strategy = EStreamLookupStrategyType::LookupRows; return Build(ctx, node.Pos()) - .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) + .Table(BuildTableMeta(*implTable, node.Pos(), ctx)) .LookupKeys(lookupIndex.LookupKeys()) .Columns(lookupIndex.Columns()) .Settings(settings.BuildNode(ctx, node.Pos())) @@ -375,7 +611,7 @@ TExprBase KqpRewriteLookupIndex(const TExprBase& node, TExprContext& ctx, const } return Build(ctx, node.Pos()) - .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) + .Table(BuildTableMeta(*implTable, node.Pos(), ctx)) .LookupKeys(lookupIndex.LookupKeys()) .Columns(lookupIndex.Columns()) .Done(); @@ -387,7 +623,7 @@ TExprBase KqpRewriteLookupIndex(const TExprBase& node, TExprContext& ctx, const TKqpStreamLookupSettings settings; settings.Strategy = EStreamLookupStrategyType::LookupRows; TExprBase lookupIndexTable = Build(ctx, node.Pos()) - .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) + .Table(BuildTableMeta(*implTable, node.Pos(), ctx)) .LookupKeys(lookupIndex.LookupKeys()) .Columns(keyColumnsList) .Settings(settings.BuildNode(ctx, node.Pos())) @@ -402,7 +638,7 @@ TExprBase KqpRewriteLookupIndex(const TExprBase& node, TExprContext& ctx, const } TExprBase lookupIndexTable = Build(ctx, node.Pos()) - .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) + .Table(BuildTableMeta(*implTable, node.Pos(), ctx)) .LookupKeys(lookupIndex.LookupKeys()) .Columns(keyColumnsList) .Done(); @@ -426,12 +662,16 @@ TExprBase KqpRewriteStreamLookupIndex(const TExprBase& node, TExprContext& ctx, auto settings = TKqpStreamLookupSettings::Parse(streamLookupIndex); const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, streamLookupIndex.Table().Path()); - const auto& [indexMeta, _] = tableDesc.Metadata->GetIndexMetadata(streamLookupIndex.Index().StringValue()); + const auto indexName = streamLookupIndex.Index().Value(); + auto [implTable, indexDesc] = tableDesc.Metadata->GetIndex(indexName); + // TODO(mbkkt) instead of ensure should be warning and main table lookup? + YQL_ENSURE(indexDesc->Type != TIndexDescription::EType::GlobalSyncVectorKMeansTree, + "stream lookup doesn't support vector index: " << indexName); - const bool needDataRead = CheckIndexCovering(streamLookupIndex, indexMeta); + const bool needDataRead = CheckIndexCovering(streamLookupIndex, implTable); if (!needDataRead) { return Build(ctx, node.Pos()) - .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) + .Table(BuildTableMeta(*implTable, node.Pos(), ctx)) .LookupKeys(streamLookupIndex.LookupKeys()) .Columns(streamLookupIndex.Columns()) .Settings(streamLookupIndex.Settings()) @@ -441,7 +681,7 @@ TExprBase KqpRewriteStreamLookupIndex(const TExprBase& node, TExprContext& ctx, auto keyColumnsList = BuildKeyColumnsList(tableDesc, streamLookupIndex.Pos(), ctx); TExprBase lookupIndexTable = Build(ctx, node.Pos()) - .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) + .Table(BuildTableMeta(*implTable, node.Pos(), ctx)) .LookupKeys(streamLookupIndex.LookupKeys()) .Columns(keyColumnsList) .Settings(streamLookupIndex.Settings()) @@ -465,8 +705,8 @@ TExprBase KqpRewriteStreamLookupIndex(const TExprBase& node, TExprContext& ctx, .Tuple("tuple") .Index().Value("0").Build() .Build() - .Build() - .Build() + .Build() + .Build() .Done(); } else { lookupKeys = lookupIndexTable; @@ -656,19 +896,72 @@ TExprBase KqpRewriteTopSortOverIndexRead(const TExprBase& node, TExprContext& ct return node; const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, readTableIndex.Table().Path()); - const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(TString(readTableIndex.Index().Value())); - const auto& indexDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, indexMeta->Name); + const auto indexName = readTableIndex.Index().Value(); + auto [implTable, indexDesc] = tableDesc.Metadata->GetIndex(indexName); + if (indexDesc->Type == TIndexDescription::EType::GlobalSyncVectorKMeansTree) { + const auto* lambdaArgs = topBase.KeySelectorLambda().Args().Raw(); + auto lambdaBody = topBase.KeySelectorLambda().Body(); + bool canUseVectorIndex = CanUseVectorIndex(*indexDesc, lambdaBody, topBase); + if (!canUseVectorIndex) { + auto argument = lambdaBody.Maybe().Struct().Maybe(); + if (!argument) { + // TODO(mbkkt) some warnings? + return node; + } + auto asStruct = maybeFlatMap.Lambda().Body().Maybe().Input().Maybe(); + if (!asStruct) { + // TODO(mbkkt) some warnings? + return node; + } + + // TODO(mbkkt) I think it shouldn't matter, but for my paranoia I will keep it for now + // In general I want to check that result of flat map used as argument for member access in top lambda + const auto argumentName = argument.Cast().Name(); + if (absl::c_none_of(maybeFlatMap.Cast().Lambda().Args(), + [&](const TCoArgument& argument) { return argumentName == argument.Name(); })) { + // TODO(mbkkt) some warnings? + return node; + } + + const auto memberName = lambdaBody.Cast().Name().Value(); + for (const auto& arg : asStruct.Cast().Args()) { + if (!arg->IsList()) { + continue; + } + auto argChildren = arg->Children(); + if (argChildren.size() != 2) { + continue; + } + auto atom = TExprBase{argChildren[0].Get()}.Maybe(); + if (!atom || atom.Cast().Value() != memberName) { + continue; + } + lambdaBody = TExprBase{argChildren[1]}; + canUseVectorIndex = CanUseVectorIndex(*indexDesc, lambdaBody, topBase); + break; + } + if (!canUseVectorIndex) { + // TODO(mbkkt) some warnings? + return node; + } + lambdaArgs = maybeFlatMap.Cast().Lambda().Args().Raw(); + } + return DoRewriteTopSortOverKMeansTree(readTableIndex, maybeFlatMap, *lambdaArgs, lambdaBody, topBase, + ctx, kqpCtx, tableDesc, *indexDesc, *implTable); + } + const auto& implTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, implTable->Name); + YQL_ENSURE(implTableDesc.Metadata->Name.EndsWith(NTableIndex::ImplTable)); TVector extraColumns; - if (maybeFlatMap && !CanPushFlatMap(maybeFlatMap.Cast(), indexDesc, parentsMap, extraColumns)) + if (maybeFlatMap && !CanPushFlatMap(maybeFlatMap.Cast(), implTableDesc, parentsMap, extraColumns)) return node; - if (!CanPushTopSort(topBase, indexDesc, &extraColumns)) { + if (!CanPushTopSort(topBase, implTableDesc, &extraColumns)) { return node; } - bool needSort = node.Maybe() && !IsKeySelectorPkPrefix(topBase.KeySelectorLambda(), indexDesc); + bool needSort = node.Maybe() && !IsKeySelectorPkPrefix(topBase.KeySelectorLambda(), implTableDesc); auto filter = [&](const TExprBase& in) mutable { auto sortInput = in; @@ -692,7 +985,7 @@ TExprBase KqpRewriteTopSortOverIndexRead(const TExprBase& node, TExprContext& ct return TExprBase(newTop); }; - auto lookup = DoRewriteIndexRead(readTableIndex, ctx, tableDesc, indexMeta, + auto lookup = DoRewriteIndexRead(readTableIndex, ctx, tableDesc, implTable, kqpCtx.IsScanQuery(), extraColumns, filter); return Build(ctx, node.Pos()) @@ -720,11 +1013,16 @@ TExprBase KqpRewriteTakeOverIndexRead(const TExprBase& node, TExprContext& ctx, return node; const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, readTableIndex.Table().Path()); - const auto& [indexMeta, _ ] = tableDesc.Metadata->GetIndexMetadata(TString(readTableIndex.Index().Value())); - const auto& indexDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, indexMeta->Name); + const auto indexName = readTableIndex.Index().Value(); + auto [implTable, indexDesc] = tableDesc.Metadata->GetIndex(indexName); + if (indexDesc->Type == TIndexDescription::EType::GlobalSyncVectorKMeansTree) { + // TODO(mbkkt) some warning? + return node; + } + const auto& implTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, implTable->Name); TVector extraColumns; - if (maybeFlatMap && !CanPushFlatMap(maybeFlatMap.Cast(), indexDesc, parentsMap, extraColumns)) + if (maybeFlatMap && !CanPushFlatMap(maybeFlatMap.Cast(), implTableDesc, parentsMap, extraColumns)) return node; auto filter = [&](const TExprBase& in) mutable { @@ -742,7 +1040,7 @@ TExprBase KqpRewriteTakeOverIndexRead(const TExprBase& node, TExprContext& ctx, return TExprBase(ctx.ChangeChild(*node.Ptr(), 0, takeChild.Ptr())); }; - return DoRewriteIndexRead(readTableIndex, ctx, tableDesc, indexMeta, kqpCtx.IsScanQuery(), extraColumns, filter); + return DoRewriteIndexRead(readTableIndex, ctx, tableDesc, implTable, kqpCtx.IsScanQuery(), extraColumns, filter); } } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp index c655760ce415..b9eb6226eadd 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp @@ -183,7 +183,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx auto maxKey = calcKey(primaryBuildResult, mainTableDesc.Metadata->KeyColumnNames.size(), false, mainTableDesc); for (auto& index : mainTableDesc.Metadata->Indexes) { if (index.Type != TIndexDescription::EType::GlobalAsync && index.State == TIndexDescription::EIndexState::Ready) { - auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, mainTableDesc.Metadata->GetIndexMetadata(TString(index.Name)).first->Name); + auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, mainTableDesc.Metadata->GetIndexMetadata(index.Name).first->Name); bool uselessIndex = true; for (size_t i = 0; i < mainTableDesc.Metadata->KeyColumnNames.size(); ++i) { @@ -223,7 +223,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx } } - auto& tableDesc = indexName ? kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, mainTableDesc.Metadata->GetIndexMetadata(TString(indexName.Cast())).first->Name) : mainTableDesc; + auto& tableDesc = indexName ? kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, mainTableDesc.Metadata->GetIndexMetadata(indexName.Cast()).first->Name) : mainTableDesc; auto buildResult = extractor->BuildComputeNode(tableDesc.Metadata->KeyColumnNames, ctx, typesCtx); @@ -444,4 +444,3 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx } } // namespace NKikimr::NKqp::NOpt - diff --git a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_indexes.cpp b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_indexes.cpp index 891ae7164183..d05c11585f62 100644 --- a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_indexes.cpp +++ b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_indexes.cpp @@ -83,18 +83,17 @@ TVector> BuildSecondaryInde { TVector> secondaryIndexes; secondaryIndexes.reserve(table.Metadata->Indexes.size()); - YQL_ENSURE(table.Metadata->Indexes.size() == table.Metadata->SecondaryGlobalIndexMetadata.size()); + YQL_ENSURE(table.Metadata->Indexes.size() == table.Metadata->ImplTables.size()); for (size_t i = 0; i < table.Metadata->Indexes.size(); i++) { - const auto& indexMeta = table.Metadata->Indexes[i]; - - if (!indexMeta.ItUsedForWrite()) { + const auto& index = table.Metadata->Indexes[i]; + if (!index.ItUsedForWrite()) { continue; } // Add index if filter absent bool addIndex = filter ? false : true; - for (const auto& col : indexMeta.KeyColumns) { + for (const auto& col : index.KeyColumns) { if (filter) { // Add index if filter and at least one column present in the filter @@ -102,7 +101,7 @@ TVector> BuildSecondaryInde } } - for (const auto& col : indexMeta.DataColumns) { + for (const auto& col : index.DataColumns) { if (filter) { // Add index if filter and at least one column present in the filter @@ -110,9 +109,11 @@ TVector> BuildSecondaryInde } } - if (indexMeta.KeyColumns && addIndex) { - auto indexTable = tableBuilder(*table.Metadata->SecondaryGlobalIndexMetadata[i], pos, ctx).Ptr(); - secondaryIndexes.emplace_back(std::make_pair(indexTable, &indexMeta)); + if (index.KeyColumns && addIndex) { + auto& implTable = table.Metadata->ImplTables[i]; + YQL_ENSURE(!implTable->Next); + auto indexTable = tableBuilder(*implTable, pos, ctx).Ptr(); + secondaryIndexes.emplace_back(indexTable, &index); } } return secondaryIndexes; diff --git a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_uniq_helper.cpp b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_uniq_helper.cpp index 5ccf8ae7d300..dbaf852e4e9c 100644 --- a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_uniq_helper.cpp +++ b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_uniq_helper.cpp @@ -57,8 +57,9 @@ class TInsertUniqBuildHelper : public TUniqBuildHelper { if (indexId == -1) { meta = &mainTableMeta; } else { - YQL_ENSURE((size_t)indexId < mainTableMeta.SecondaryGlobalIndexMetadata.size()); - meta = mainTableMeta.SecondaryGlobalIndexMetadata[indexId].Get(); + YQL_ENSURE((size_t)indexId < mainTableMeta.ImplTables.size()); + meta = mainTableMeta.ImplTables[indexId].Get(); + YQL_ENSURE(!meta->Next); } auto inputs = Build(ctx, pos) @@ -138,8 +139,9 @@ class TUpsertUniqBuildHelper : public TUniqBuildHelper { if (indexId == -1) { meta = &mainTableMeta; } else { - YQL_ENSURE((size_t)indexId < mainTableMeta.SecondaryGlobalIndexMetadata.size()); - meta = mainTableMeta.SecondaryGlobalIndexMetadata[indexId].Get(); + YQL_ENSURE((size_t)indexId < mainTableMeta.ImplTables.size()); + meta = mainTableMeta.ImplTables[indexId].Get(); + YQL_ENSURE(!meta->Next); } TVector inputs; diff --git a/ydb/core/kqp/provider/yql_kikimr_datasource.cpp b/ydb/core/kqp/provider/yql_kikimr_datasource.cpp index 2d9460526410..2725e87f18cc 100644 --- a/ydb/core/kqp/provider/yql_kikimr_datasource.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_datasource.cpp @@ -336,12 +336,16 @@ class TKiSourceLoadTableMetadataTransformer : public TGraphTransformerBase { tableDesc->Metadata = res.Metadata; bool sysColumnsEnabled = SessionCtx->Config().SystemColumnsEnabled(); - YQL_ENSURE(res.Metadata->Indexes.size() == res.Metadata->SecondaryGlobalIndexMetadata.size()); - for (const auto& indexMeta : res.Metadata->SecondaryGlobalIndexMetadata) { - YQL_ENSURE(indexMeta); - auto& desc = SessionCtx->Tables().GetOrAddTable(indexMeta->Cluster, SessionCtx->GetDatabase(), indexMeta->Name); - desc.Metadata = indexMeta; - desc.Load(ctx, sysColumnsEnabled); + YQL_ENSURE(res.Metadata->Indexes.size() == res.Metadata->ImplTables.size()); + for (auto implTable : res.Metadata->ImplTables) { + YQL_ENSURE(implTable); + do { + auto nextImplTable = implTable->Next; + auto& desc = SessionCtx->Tables().GetOrAddTable(implTable->Cluster, SessionCtx->GetDatabase(), implTable->Name); + desc.Metadata = std::move(implTable); + desc.Load(ctx, sysColumnsEnabled); + implTable = std::move(nextImplTable); + } while (implTable); } if (!tableDesc->Load(ctx, sysColumnsEnabled)) { diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.h b/ydb/core/kqp/provider/yql_kikimr_gateway.h index a86e2cc419cc..3f807a2aee69 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.h +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.h @@ -468,6 +468,8 @@ struct TViewPersistedData { }; struct TKikimrTableMetadata : public TThrRefBase { + TIntrusivePtr Next; + bool DoesExist = false; TString Cluster; TString Name; @@ -494,9 +496,9 @@ struct TKikimrTableMetadata : public TThrRefBase { TVector KeyColumnNames; TVector ColumnOrder; - // Indexes and SecondaryGlobalIndexMetadata must be in same order + // Indexes and ImplTables must be in same order TVector Indexes; - TVector> SecondaryGlobalIndexMetadata; + TVector> ImplTables; TVector ColumnFamilies; TTableSettings TableSettings; @@ -536,13 +538,23 @@ struct TKikimrTableMetadata : public TThrRefBase { orderMap.emplace(col.GetId(), col.GetName()); } - Indexes.reserve(message->GetIndexes().size()); - for(auto& index: message->GetIndexes()) - Indexes.push_back(TIndexDescription(&index)); + const auto indexesCount = message->GetIndexes().size(); + Indexes.reserve(indexesCount); + for(auto& index: message->GetIndexes()) { + Indexes.emplace_back(&index); + } - SecondaryGlobalIndexMetadata.reserve(message->GetSecondaryGlobalIndexMetadata().size()); - for(auto& sgim: message->GetSecondaryGlobalIndexMetadata()) - SecondaryGlobalIndexMetadata.push_back(MakeIntrusive(&sgim)); + auto it = message->GetSecondaryGlobalIndexMetadata().begin(); + ImplTables.reserve(indexesCount); + for(int i = 0; i < indexesCount; ++i) { + YQL_ENSURE(it != message->GetSecondaryGlobalIndexMetadata().end()); + auto& implTable = ImplTables.emplace_back(MakeIntrusive(&*it++)); + if (Indexes[i].Type == TIndexDescription::EType::GlobalSyncVectorKMeansTree) { + YQL_ENSURE(it != message->GetSecondaryGlobalIndexMetadata().end()); + implTable->Next = MakeIntrusive(&*it++); + } + } + YQL_ENSURE(it == message->GetSecondaryGlobalIndexMetadata().end()); ColumnOrder.reserve(Columns.size()); for(auto& [_, name]: orderMap) { @@ -608,8 +620,12 @@ struct TKikimrTableMetadata : public TThrRefBase { index.ToMessage(message->AddIndexes()); } - for(auto& IndexTableMetadata: SecondaryGlobalIndexMetadata) { - IndexTableMetadata->ToMessage(message->AddSecondaryGlobalIndexMetadata()); + for(auto implTable: ImplTables) { + YQL_ENSURE(implTable); + do { + implTable->ToMessage(message->AddSecondaryGlobalIndexMetadata()); + implTable = implTable->Next; + } while (implTable); } } @@ -619,17 +635,22 @@ struct TKikimrTableMetadata : public TThrRefBase { return proto.SerializeAsString(); } - std::pair, TIndexDescription::EIndexState> GetIndexMetadata(const TString& indexName) const { + std::pair, const TIndexDescription*> GetIndex(std::string_view indexName) const { YQL_ENSURE(Indexes.size(), "GetIndexMetadata called for table without indexes"); - YQL_ENSURE(Indexes.size() == SecondaryGlobalIndexMetadata.size(), "index metadata has not been loaded yet"); + YQL_ENSURE(Indexes.size() == ImplTables.size(), "index metadata has not been loaded yet"); for (size_t i = 0; i < Indexes.size(); i++) { if (Indexes[i].Name == indexName) { - auto metadata = SecondaryGlobalIndexMetadata[i]; - YQL_ENSURE(metadata, "unexpected empty metadata for index " << indexName); - return {metadata, Indexes[i].State}; + auto implTable = ImplTables[i]; + YQL_ENSURE(implTable, "unexpected empty metadata for index " << indexName); + return {std::move(implTable), &Indexes[i]}; } } - return {nullptr, TIndexDescription::EIndexState::Invalid}; + return {nullptr, nullptr}; + } + + std::pair, TIndexDescription::EIndexState> GetIndexMetadata(std::string_view indexName) const { + auto [implTable, index] = GetIndex(indexName); + return {std::move(implTable), index ? index->State : TIndexDescription::EIndexState::Invalid}; } bool IsOlap() const { diff --git a/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp b/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp index d47cf694d396..acfd2f1d3b77 100644 --- a/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp @@ -137,8 +137,6 @@ struct TKiExploreTxResults { YQL_ENSURE(indexIt != tableMeta->Indexes.end(), "Index not found"); const auto indexTablePaths = NKikimr::NKqp::NSchemeHelpers::CreateIndexTablePath(tableMeta->Name, indexIt->Type, indexName); - YQL_ENSURE(indexTablePaths.size() == 1, "Only index with one impl table is supported"); - const auto indexTablePath = indexTablePaths[0]; THashSet indexColumns; indexColumns.reserve(indexIt->KeyColumns.size() + indexIt->DataColumns.size()); @@ -158,7 +156,13 @@ struct TKiExploreTxResults { } } - uncommittedChangesRead = HasWriteOps(indexTablePath) || (needMainTableRead && HasWriteOps(tableMeta->Name)); + uncommittedChangesRead = needMainTableRead && HasWriteOps(tableMeta->Name); + for (auto& indexTablePath : indexTablePaths) { + if (uncommittedChangesRead) { + break; + } + uncommittedChangesRead = HasWriteOps(indexTablePath); + } } else { uncommittedChangesRead = HasWriteOps(tableMeta->Name); } diff --git a/ydb/core/kqp/provider/yql_kikimr_provider.cpp b/ydb/core/kqp/provider/yql_kikimr_provider.cpp index f645dce579b4..022e98a7d8ea 100644 --- a/ydb/core/kqp/provider/yql_kikimr_provider.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_provider.cpp @@ -843,13 +843,14 @@ void TableDescriptionToTableInfoImpl(const TKikimrTableDescription& desc, TYdbOp continue; } - const auto& idxTableDesc = desc.Metadata->SecondaryGlobalIndexMetadata[idxNo]; + const auto& implTable = *desc.Metadata->ImplTables[idxNo]; + YQL_ENSURE(!implTable.Next); auto info = NKqpProto::TKqpTableInfo(); - info.SetTableName(idxTableDesc->Name); - info.MutableTableId()->SetOwnerId(idxTableDesc->PathId.OwnerId()); - info.MutableTableId()->SetTableId(idxTableDesc->PathId.TableId()); - info.SetSchemaVersion(idxTableDesc->SchemaVersion); + info.SetTableName(implTable.Name); + info.MutableTableId()->SetOwnerId(implTable.PathId.OwnerId()); + info.MutableTableId()->SetTableId(implTable.PathId.TableId()); + info.SetSchemaVersion(implTable.SchemaVersion); back_inserter = std::move(info); ++back_inserter; diff --git a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp index 67e8d3c0f0d7..f7f6267ee323 100644 --- a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp +++ b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp @@ -16,6 +16,8 @@ #include +#include + namespace NKikimr { namespace NKqp { @@ -2140,6 +2142,255 @@ Y_UNIT_TEST_SUITE(KqpIndexes) { } } + void DoPositiveQueriesVectorIndex(TSession& session, const TString& query) { + { + auto result = session.ExplainDataQuery(query).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), + "Failed to explain: `" << query << "` with " << result.GetIssues().ToString()); + } + { + auto result = session.ExecuteDataQuery(query, + TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx() + ).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), + "Failed to execute: `" << query << "` with " << result.GetIssues().ToString()); + } + } + + void DoPositiveQueriesVectorIndex(TSession& session, const TString& mainQuery, const TString& indexQuery) { + DoPositiveQueriesVectorIndex(session, mainQuery); + DoPositiveQueriesVectorIndex(session, indexQuery); + } + + void DoPositiveQueriesVectorIndexOrderBy( + TSession& session, + std::string_view function, + std::string_view direction, + std::string_view left, + std::string_view right) { + constexpr std::string_view target = "$target = \"\x67\x73\x03\";"; + std::string metric = std::format("Knn::{}({}, {})", function, left, right); + // no metric in result + { + const TString plainQuery(Q1_(std::format(R"({} + SELECT * FROM `/Root/TestTable` + ORDER BY {} {} + LIMIT 3; + )", target, metric, direction))); + const TString indexQuery(Q1_(std::format(R"({} + SELECT * FROM `/Root/TestTable` VIEW index + ORDER BY {} {} + LIMIT 3; + )", target, metric, direction))); + DoPositiveQueriesVectorIndex(session, plainQuery, indexQuery); + } + // metric in result + { + const TString plainQuery(Q1_(std::format(R"({} + SELECT {}, `/Root/TestTable`.* FROM `/Root/TestTable` + ORDER BY {} {} + LIMIT 3; + )", target, metric, metric, direction))); + const TString indexQuery(Q1_(std::format(R"({} + SELECT {}, `/Root/TestTable`.* FROM `/Root/TestTable` VIEW index + ORDER BY {} {} + LIMIT 3; + )", target, metric, metric, direction))); + DoPositiveQueriesVectorIndex(session, plainQuery, indexQuery); + } + // metric as result + { + const TString plainQuery(Q1_(std::format(R"({} + SELECT {} AS m, `/Root/TestTable`.* FROM `/Root/TestTable` + ORDER BY m {} + LIMIT 3; + )", target, metric, direction))); + const TString indexQuery(Q1_(std::format(R"({} + SELECT {} AS m, `/Root/TestTable`.* FROM `/Root/TestTable` VIEW index + ORDER BY m {} + LIMIT 3; + )", target, metric, direction))); + DoPositiveQueriesVectorIndex(session, plainQuery, indexQuery); + } + } + + void DoPositiveQueriesVectorIndexOrderBy( + TSession& session, + std::string_view function, + std::string_view direction) { + // target is left, member is right + DoPositiveQueriesVectorIndexOrderBy(session, function, direction, "$target", "emb"); + // target is right, member is left + DoPositiveQueriesVectorIndexOrderBy(session, function, direction, "emb", "$target"); + } + + void DoPositiveQueriesVectorIndexOrderByCosine(TSession& session) { + // distance, default direction + DoPositiveQueriesVectorIndexOrderBy(session, "CosineDistance", ""); + // distance, asc direction + DoPositiveQueriesVectorIndexOrderBy(session, "CosineDistance", "ASC"); + // similarity, desc direction + DoPositiveQueriesVectorIndexOrderBy(session, "CosineSimilarity", "DESC"); + } + + TSession DoCreateTableForVectorIndex(TTableClient& db, bool nullable) { + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto tableBuilder = db.GetTableBuilder(); + if (nullable) { + tableBuilder + .AddNullableColumn("pk", EPrimitiveType::Int64) + .AddNullableColumn("emb", EPrimitiveType::String) + .AddNullableColumn("data", EPrimitiveType::String); + } else { + tableBuilder + .AddNonNullableColumn("pk", EPrimitiveType::Int64) + .AddNonNullableColumn("emb", EPrimitiveType::String) + .AddNonNullableColumn("data", EPrimitiveType::String); + } + tableBuilder.SetPrimaryKeyColumns(TVector{"pk"}); + auto result = session.CreateTable("/Root/TestTable", tableBuilder.Build()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { + const TString query1(Q_(R"( + UPSERT INTO `/Root/TestTable` (pk, emb, data) VALUES)" + "(0, \"\x03\x30\x03\", \"0\")," + "(1, \"\x13\x31\x03\", \"1\")," + "(2, \"\x23\x32\x03\", \"2\")," + "(3, \"\x33\x33\x03\", \"3\")," + "(4, \"\x43\x34\x03\", \"4\")," + "(5, \"\x60\x60\x03\", \"5\")," + "(6, \"\x61\x61\x03\", \"6\")," + "(7, \"\x62\x62\x03\", \"7\")," + "(8, \"\x75\x76\x03\", \"8\")," + "(9, \"\x76\x76\x03\", \"9\");" + )); + + auto result = session.ExecuteDataQuery( + query1, + TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()) + .ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + return session; + } + + Y_UNIT_TEST(VectorIndexOrderByCosineDistance) { + NKikimrConfig::TFeatureFlags featureFlags; + featureFlags.SetEnableVectorIndex(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetFeatureFlags(featureFlags) + .SetKqpSettings({setting}); + + TKikimrRunner kikimr(serverSettings); + auto db = kikimr.GetTableClient(); + auto session = DoCreateTableForVectorIndex(db, false); + { + const TString createIndex(Q_(R"( + ALTER TABLE `/Root/TestTable` + ADD INDEX index + GLOBAL USING vector_kmeans_tree + ON (emb) + WITH (distance=cosine, vector_type="uint8", vector_dimension=2, levels=1, clusters=2); + )")); + + auto result = session.ExecuteSchemeQuery(createIndex) + .ExtractValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + DoPositiveQueriesVectorIndexOrderByCosine(session); + } + + Y_UNIT_TEST(VectorIndexOrderByCosineSimilarity) { + NKikimrConfig::TFeatureFlags featureFlags; + featureFlags.SetEnableVectorIndex(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetFeatureFlags(featureFlags) + .SetKqpSettings({setting}); + + TKikimrRunner kikimr(serverSettings); + auto db = kikimr.GetTableClient(); + auto session = DoCreateTableForVectorIndex(db, false); + { + const TString createIndex(Q_(R"( + ALTER TABLE `/Root/TestTable` + ADD INDEX index + GLOBAL USING vector_kmeans_tree + ON (emb) + WITH (similarity=cosine, vector_type="uint8", vector_dimension=2, levels=1, clusters=2); + )")); + + auto result = session.ExecuteSchemeQuery(createIndex) + .ExtractValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + DoPositiveQueriesVectorIndexOrderByCosine(session); + } + + Y_UNIT_TEST(VectorIndexOrderByCosineDistanceNullable) { + NKikimrConfig::TFeatureFlags featureFlags; + featureFlags.SetEnableVectorIndex(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetFeatureFlags(featureFlags) + .SetKqpSettings({setting}); + + TKikimrRunner kikimr(serverSettings); + auto db = kikimr.GetTableClient(); + auto session = DoCreateTableForVectorIndex(db, true); + { + const TString createIndex(Q_(R"( + ALTER TABLE `/Root/TestTable` + ADD INDEX index + GLOBAL USING vector_kmeans_tree + ON (emb) + WITH (distance=cosine, vector_type="uint8", vector_dimension=2, levels=1, clusters=2); + )")); + + auto result = session.ExecuteSchemeQuery(createIndex) + .ExtractValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + DoPositiveQueriesVectorIndexOrderByCosine(session); + } + + Y_UNIT_TEST(VectorIndexOrderByCosineSimilarityNullable) { + NKikimrConfig::TFeatureFlags featureFlags; + featureFlags.SetEnableVectorIndex(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetFeatureFlags(featureFlags) + .SetKqpSettings({setting}); + + TKikimrRunner kikimr(serverSettings); + auto db = kikimr.GetTableClient(); + auto session = DoCreateTableForVectorIndex(db, true); + { + const TString createIndex(Q_(R"( + ALTER TABLE `/Root/TestTable` + ADD INDEX index + GLOBAL USING vector_kmeans_tree + ON (emb) + WITH (similarity=cosine, vector_type="uint8", vector_dimension=2, levels=1, clusters=2); + )")); + + auto result = session.ExecuteSchemeQuery(createIndex) + .ExtractValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + DoPositiveQueriesVectorIndexOrderByCosine(session); + } + Y_UNIT_TEST(ExplainCollectFullDiagnostics) { auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() diff --git a/ydb/core/kqp/ut/indexes/ya.make b/ydb/core/kqp/ut/indexes/ya.make index 3d7e53ae7f2a..c058edef3f41 100644 --- a/ydb/core/kqp/ut/indexes/ya.make +++ b/ydb/core/kqp/ut/indexes/ya.make @@ -20,6 +20,7 @@ PEERDIR( ydb/core/kqp ydb/core/kqp/ut/common ydb/library/yql/providers/common/http_gateway + ydb/library/yql/udfs/common/knn yql/essentials/sql/pg_dummy ) diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp index 53e884e7cc5f..de0a37e86b2d 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_indexed_table.cpp @@ -284,7 +284,6 @@ TVector CreateIndexedTable(TOperationId nextId, const TTxTr userLevelDesc = indexDescription.GetIndexImplTableDescriptions(0); userPostingDesc = indexDescription.GetIndexImplTableDescriptions(1); } - result.push_back(createIndexImplTable(CalcVectorKmeansTreeLevelImplTableDesc(baseTableDescription.GetPartitionConfig(), userLevelDesc))); result.push_back(createIndexImplTable(CalcVectorKmeansTreePostingImplTableDesc(baseTableDescription, baseTableDescription.GetPartitionConfig(), implTableColumns, userPostingDesc))); } else { diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp index b8958e4468c8..3619f41dbd42 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp @@ -686,8 +686,9 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil .Dive(buildInfo.IndexName) .Dive(NTableIndex::NTableVectorKmeansTreeIndex::LevelTable); Y_ASSERT(buildInfo.Sample.Rows.size() <= buildInfo.KMeans.K); - auto actor = new TUploadSampleK(path.PathString(), buildInfo.Limits, Self->SelfId(), ui64(BuildId), - buildInfo.Sample.Rows, buildInfo.KMeans.ChildBegin); + auto actor = new TUploadSampleK(path.PathString(), + buildInfo.Limits, Self->SelfId(), ui64(BuildId), + buildInfo.Sample.Rows, buildInfo.KMeans.ChildBegin); TActivationContext::AsActorContext().MakeFor(Self->SelfId()).Register(actor); buildInfo.Sample.Sent = true; diff --git a/ydb/tools/query_replay/query_compiler.cpp b/ydb/tools/query_replay/query_compiler.cpp index fc613017a7dc..aecf7c44c907 100644 --- a/ydb/tools/query_replay/query_compiler.cpp +++ b/ydb/tools/query_replay/query_compiler.cpp @@ -109,8 +109,13 @@ struct TMetadataInfoHolder { : TableMetadata(tableMetadata) { for (auto& [name, ptr] : TableMetadata) { - for (auto& secondary : ptr->SecondaryGlobalIndexMetadata) { - Indexes.emplace(secondary->Name, secondary); + for (auto implTable : ptr->ImplTables) { + YQL_ENSURE(implTable); + do { + auto nextImplTable = implTable->Next; + Indexes.emplace(implTable->Name, std::move(implTable)); + implTable = std::move(nextImplTable); + } while (implTable); } } } diff --git a/ydb/tools/query_replay_yt/query_compiler.cpp b/ydb/tools/query_replay_yt/query_compiler.cpp index 8430b30d0670..7d81e5a0513a 100644 --- a/ydb/tools/query_replay_yt/query_compiler.cpp +++ b/ydb/tools/query_replay_yt/query_compiler.cpp @@ -105,8 +105,13 @@ struct TMetadataInfoHolder { : TableMetadata(tableMetadata) { for (auto& [name, ptr] : TableMetadata) { - for (auto& secondary : ptr->SecondaryGlobalIndexMetadata) { - Indexes.emplace(secondary->Name, secondary); + for (auto implTable : ptr->ImplTables) { + YQL_ENSURE(implTable); + do { + auto nextImplTable = implTable->Next; + Indexes.emplace(implTable->Name, std::move(implTable)); + implTable = std::move(nextImplTable); + } while (implTable); } } }