Skip to content

Commit

Permalink
Multicolumn sparsed test (ydb-platform#8284)
Browse files Browse the repository at this point in the history
  • Loading branch information
aavdonkin authored Aug 30, 2024
1 parent a261c7b commit 297d5b4
Show file tree
Hide file tree
Showing 7 changed files with 324 additions and 41 deletions.
11 changes: 7 additions & 4 deletions ydb/core/formats/arrow/simple_builder/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,27 +52,30 @@ class TSimpleArrayConstructor: public IArrayBuilder {
using TSelf = TSimpleArrayConstructor<TFiller>;
using TBuilder = typename arrow::TypeTraits<typename TFiller::TValue>::BuilderType;
const TFiller Filler;
ui32 ShiftValue = 0;

TSimpleArrayConstructor(const TString& fieldName, bool nullable, const TFiller& filler)
TSimpleArrayConstructor(const TString& fieldName, bool nullable, const TFiller& filler, ui32 shiftValue = 0)
: TBase(fieldName, nullable)
, Filler(filler)
, ShiftValue(shiftValue)
{
}
protected:
virtual std::shared_ptr<arrow::Array> DoBuildArray(const ui32 recordsCount) const override {
TBuilder fBuilder = TFillerBuilderConstructor<typename TFiller::TValue>::Construct();
Y_ABORT_UNLESS(fBuilder.Reserve(recordsCount).ok());
for (ui32 i = 0; i < recordsCount; ++i) {
Y_ABORT_UNLESS(fBuilder.Append(Filler.GetValue(i)).ok());
Y_ABORT_UNLESS(fBuilder.Append(Filler.GetValue(i + ShiftValue)).ok());
}
return *fBuilder.Finish();
}


public:
TSimpleArrayConstructor(const TString& fieldName, const TFiller& filler = TFiller())
TSimpleArrayConstructor(const TString& fieldName, const TFiller& filler = TFiller(), ui32 shiftValue = 0)
: TBase(fieldName)
, Filler(filler)
, ShiftValue(shiftValue)
{
}

Expand Down
63 changes: 63 additions & 0 deletions ydb/core/formats/arrow/simple_builder/filler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
#include <contrib/libs/apache/arrow/cpp/src/arrow/type.h>
#include <contrib/libs/apache/arrow/cpp/src/arrow/type_traits.h>
#include <contrib/libs/apache/arrow/cpp/src/arrow/util/string_view.h>

#include <library/cpp/testing/unittest/registar.h>

#include <util/generic/string.h>
#include <util/system/types.h>
#include <util/random/random.h>

namespace NKikimr::NArrow::NConstruction {

Expand All @@ -25,6 +29,65 @@ class TIntSeqFiller {
}
};

class TStringType : public arrow::StringType {
public:
using c_type = TString;
};

template <class TArrowType>
class TPoolFiller {
private:
using CType = typename TArrowType::c_type;

private:
std::vector<CType> Data;

public:
using TValue = std::conditional_t<std::is_same_v<TArrowType, TStringType>, arrow::StringType, TArrowType>;
using ValueType = std::conditional_t<std::is_same_v<TArrowType, TStringType>, arrow::util::string_view, CType>;

static CType GetRandomNumberNotEqDef(CType defaultValue) {
CType result;
do {
result = RandomNumber<double>() * std::numeric_limits<CType>::max();
} while (result == defaultValue);
return result;
}

TPoolFiller(const ui32 poolSize, const CType defaultValue, const double defaultValueFrq) {
for (ui32 i = 0; i < poolSize; ++i) {
if (RandomNumber<double>() < defaultValueFrq) {
Data.emplace_back(defaultValue);
} else {
Data.emplace_back(GetRandomNumberNotEqDef(defaultValue));
}
}
}

TPoolFiller(const ui32 poolSize, const ui32 strLen, const TString& defaultValue, const double defaultValueFrq) {
for (ui32 i = 0; i < poolSize; ++i) {
if (RandomNumber<double>() < defaultValueFrq) {
Data.emplace_back(defaultValue);
} else {
Data.emplace_back(NUnitTest::RandomString(strLen, i));
}
}
}

template<class Type>
const ValueType Convert(const Type& v) const {
return v;
}

const ValueType Convert(const TString& str) const {
return arrow::util::string_view(str.data(), str.size());
}

ValueType GetValue(const ui32 idx) const {
return Convert(Data[(2 + 7 * idx) % Data.size()]);
}
};

template <class TArrowInt>
class TIntConstFiller {
public:
Expand Down
25 changes: 25 additions & 0 deletions ydb/core/kqp/ut/olap/helpers/typed_local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,31 @@ TString TTypedLocalHelper::GetTestTableSchema() const {
return result;
}

TString TTypedLocalHelper::GetMultiColumnTestTableSchema(ui32 reps) const {
TString result;
result += R"(
Columns { Name: "pk_int" Type: "Int64" NotNull: true }
Columns { Name: "ts" Type: "Timestamp" }
)";
for (ui32 i = 0; i < reps; i++) {
TString strNum = ToString(i);
result += "Columns {Name: \"field_utf" + strNum + "\" Type: \"Utf8\"}\n";
result += "Columns {Name: \"field_int" + strNum + "\" Type: \"Int64\"}\n";
result += "Columns {Name: \"field_uint" + strNum + "\" Type: \"Uint8\"}\n";
result += "Columns {Name: \"field_float" + strNum + "\" Type: \"Float\"}\n";
result += "Columns {Name: \"field_double" + strNum + "\" Type: \"Double\"}\n";
}
result += R"(
KeyColumnNames: "pk_int"
Engine: COLUMN_ENGINE_REPLACING_TIMESERIES
)";
return result;
}

void TTypedLocalHelper::CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount, ui32 tableShardsCount) {
CreateSchemaOlapTableWithStore(GetMultiColumnTestTableSchema(reps), TableName, "olapStore", storeShardsCount, tableShardsCount);
}

void TTypedLocalHelper::ExecuteSchemeQuery(const TString& alterQuery, const NYdb::EStatus expectedStatus /*= EStatus::SUCCESS*/) const {
auto session = KikimrRunner.GetTableClient().CreateSession().GetValueSync().GetSession();
auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync();
Expand Down
31 changes: 30 additions & 1 deletion ydb/core/kqp/ut/olap/helpers/typed_local.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

#include <ydb/public/sdk/cpp/client/ydb_types/status_codes.h>

#include <contrib/libs/apache/arrow/cpp/src/arrow/array/builder_binary.h>

#include <library/cpp/json/writer/json_value.h>

namespace NKikimr::NKqp {
Expand Down Expand Up @@ -83,11 +85,38 @@ class TTypedLocalHelper: public Tests::NCS::THelper {
TBase::SendDataViaActorSystem(TablePath, batch);
}

void FillMultiColumnTable(ui32 repCount, const double pkKff = 0, const ui32 numRows = 800000) const {
const double frq = 0.9;
NArrow::NConstruction::TPoolFiller<arrow::Int64Type> int64Pool(1000, 0, frq);
NArrow::NConstruction::TPoolFiller<arrow::UInt8Type> uint8Pool(1000, 0, frq);
NArrow::NConstruction::TPoolFiller<arrow::FloatType> floatPool(1000, 0, frq);
NArrow::NConstruction::TPoolFiller<arrow::DoubleType> doublePool(1000, 0, frq);
NArrow::NConstruction::TPoolFiller<NKikimr::NArrow::NConstruction::TStringType> utfPool(1000, 52, "abcde", frq);

std::vector<NArrow::NConstruction::IArrayBuilder::TPtr> builders;
builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TIntSeqFiller<arrow::Int64Type>>::BuildNotNullable("pk_int", numRows * pkKff));
for (ui32 i = 0; i < repCount; i++) {
TString repStr = ToString(i);
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TPoolFiller<NKikimr::NArrow::NConstruction::TStringType>>>("field_utf" + repStr, utfPool, i));
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TPoolFiller<arrow::Int64Type>>>("field_int" + repStr, int64Pool, i));
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TPoolFiller<arrow::UInt8Type>>>("field_uint" + repStr, uint8Pool, i));
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TPoolFiller<arrow::FloatType>>>("field_float" + repStr, floatPool, i));
builders.emplace_back(std::make_shared<NArrow::NConstruction::TSimpleArrayConstructor<NArrow::NConstruction::TPoolFiller<arrow::DoubleType>>>("field_double" + repStr, doublePool, i));
}
NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders);
std::shared_ptr<arrow::RecordBatch> batch = batchBuilder.BuildBatch(numRows);
TBase::SendDataViaActorSystem(TablePath, batch);
}


void FillPKOnly(const double pkKff = 0, const ui32 numRows = 800000) const;

void CreateTestOlapTable(ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) {
CreateOlapTableWithStore(TableName, StoreName, storeShardsCount, tableShardsCount);
}

TString GetMultiColumnTestTableSchema(ui32 reps) const;
void CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount = 4, ui32 tableShardsCount = 3);
};

}
}
Loading

0 comments on commit 297d5b4

Please sign in to comment.