Skip to content

Commit

Permalink
use yaml for table description instead yql (ydb-platform#11811)
Browse files Browse the repository at this point in the history
  • Loading branch information
iddqdex authored Nov 21, 2024
1 parent 6f7ecd5 commit b7cfb36
Show file tree
Hide file tree
Showing 44 changed files with 4,892 additions and 4,535 deletions.
127 changes: 84 additions & 43 deletions ydb/library/workload/benchmark_base/workload.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "workload.h"
#include <contrib/libs/fmt/include/fmt/format.h>
#include <ydb/public/api/protos/ydb_formats.pb.h>
#include <ydb/library/yaml_json/yaml_to_json.h>
#include <contrib/libs/yaml-cpp/include/yaml-cpp/node/parse.h>
#include <util/string/cast.h>
#include <util/system/spinlock.h>

Expand Down Expand Up @@ -33,52 +35,91 @@ const TString TWorkloadGeneratorBase::CsvFormatString = [] () {
return settings.SerializeAsString();
} ();

void TWorkloadGeneratorBase::GenerateDDLForTable(IOutputStream& result, const NJson::TJsonValue& table, bool single) const {
auto specialTypes = GetSpecialDataTypes();
specialTypes["string_type"] = Params.GetStringType();
specialTypes["date_type"] = Params.GetDateType();
specialTypes["timestamp_type"] = Params.GetTimestampType();

const auto& tableName = table["name"].GetString();
const auto path = Params.GetFullTableName(single ? nullptr : tableName.c_str());
result << Endl << "CREATE ";
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
result << "EXTERNAL ";
}
result << "TABLE `" << path << "` (" << Endl;
TVector<TStringBuilder> columns;
for (const auto& column: table["columns"].GetArray()) {
const auto& columnName = column["name"].GetString();
columns.emplace_back();
auto& so = columns.back();
so << " " << columnName << " ";
const auto& type = column["type"].GetString();
if (const auto* st = MapFindPtr(specialTypes, type)) {
so << *st;
} else {
so << type;
}
if (column["not_null"].GetBooleanSafe(false) && Params.GetStoreType() != TWorkloadBaseParams::EStoreType::Row) {
so << " NOT NULL";
}
}
result << JoinSeq(",\n", columns);
TVector<TStringBuf> keysV;
for (const auto& k: table["primary_key"].GetArray()) {
keysV.emplace_back(k.GetString());
}
const TString keys = JoinSeq(", ", keysV);
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
result << Endl;
} else {
result << "," << Endl << " PRIMARY KEY (" << keys << ")" << Endl;
}
result << ")" << Endl;

if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::Column) {
result << "PARTITION BY HASH (" << keys << ")" << Endl;
}

result << "WITH (" << Endl;
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
result << " DATA_SOURCE = \""+ Params.GetFullTableName(nullptr) + "_s3_external_source\", FORMAT = \"parquet\", LOCATION = \"" << Params.GetS3Prefix()
<< "/" << (single ? TFsPath(Params.GetPath()).GetName() : (tableName + "/")) << "\"" << Endl;
} else {
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::Column) {
result << " STORE = COLUMN," << Endl;
}
result << " AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = " << table["partitioning"].GetUIntegerSafe(64) << Endl;
}
result << ");" << Endl;
}

std::string TWorkloadGeneratorBase::GetDDLQueries() const {
TString storageType = "-- ";
TString notNull = "";
TString createExternalDataSource;
TString external;
TString partitioning = "AUTO_PARTITIONING_MIN_PARTITIONS_COUNT";
TString primaryKey = ", PRIMARY KEY";
TString partitionBy = "-- ";
switch (Params.GetStoreType()) {
case TWorkloadBaseParams::EStoreType::Column:
storageType = "STORE = COLUMN, --";
notNull = "NOT NULL";
partitionBy = "PARTITION BY HASH";
break;
case TWorkloadBaseParams::EStoreType::ExternalS3: {
TString dataSourceName = Params.GetFullTableName(nullptr) + "_s3_external_source";
storageType = fmt::format(R"(DATA_SOURCE = "{}", FORMAT = "parquet", LOCATION = )", dataSourceName);
notNull = "NOT NULL";
createExternalDataSource = fmt::format(R"(
CREATE EXTERNAL DATA SOURCE `{}` WITH (
SOURCE_TYPE="ObjectStorage",
LOCATION="{}",
AUTH_METHOD="NONE"
);
)", dataSourceName, Params.GetS3Endpoint());
external = "EXTERNAL";
partitioning = "--";
primaryKey = "--";
const auto json = GetTablesJson();

TStringBuilder result;
result << "--!syntax_v1" << Endl;
if (Params.GetStoreType() == TWorkloadBaseParams::EStoreType::ExternalS3) {
result << "CREATE EXTERNAL DATA SOURCE `" << Params.GetFullTableName(nullptr) << "_s3_external_source` WITH (" << Endl
<< " SOURCE_TYPE=\"ObjectStorage\"," << Endl
<< " LOCATION=\"" << Params.GetS3Endpoint() << "\"," << Endl
<< " AUTH_METHOD=\"NONE\"" << Endl
<< ");" << Endl;
}
case TWorkloadBaseParams::EStoreType::Row:
break;

for (const auto& table: json["tables"].GetArray()) {
GenerateDDLForTable(result.Out, table, false);
}
if (json.Has("table")) {
GenerateDDLForTable(result.Out, json["table"], true);
}
auto createSql = DoGetDDLQueries();
SubstGlobal(createSql, "{createExternal}", createExternalDataSource);
SubstGlobal(createSql, "{external}", external);
SubstGlobal(createSql, "{notnull}", notNull);
SubstGlobal(createSql, "{partitioning}", partitioning);
SubstGlobal(createSql, "{path}", Params.GetFullTableName(nullptr));
SubstGlobal(createSql, "{primary_key}", primaryKey);
SubstGlobal(createSql, "{s3_prefix}", Params.GetS3Prefix());
SubstGlobal(createSql, "{store}", storageType);
SubstGlobal(createSql, "{partition_by}", partitionBy);
SubstGlobal(createSql, "{string_type}", Params.GetStringType());
SubstGlobal(createSql, "{date_type}", Params.GetDateType());
SubstGlobal(createSql, "{timestamp_type}", Params.GetTimestampType());
return createSql.c_str();
return result;
}

NJson::TJsonValue TWorkloadGeneratorBase::GetTablesJson() const {
const auto tablesYaml = GetTablesYaml();
const auto yaml = YAML::Load(tablesYaml.c_str());
return NKikimr::NYaml::Yaml2Json(yaml, true);
}

TVector<std::string> TWorkloadGeneratorBase::GetCleanPaths() const {
Expand Down
7 changes: 6 additions & 1 deletion ydb/library/workload/benchmark_base/workload.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "state.h"
#include <ydb/library/workload/abstract/workload_query_generator.h>
#include <ydb/library/accessor/accessor.h>
#include <library/cpp/json/json_value.h>
#include <util/generic/set.h>
#include <util/generic/deque.h>
#include <util/folder/path.h>
Expand Down Expand Up @@ -47,10 +48,14 @@ class TWorkloadGeneratorBase : public IWorkloadQueryGenerator {
static const TString PsvFormatString;

protected:
virtual TString DoGetDDLQueries() const = 0;
using TSpecialDataTypes = TMap<TString, TString>;
virtual TString GetTablesYaml() const = 0;
virtual TSpecialDataTypes GetSpecialDataTypes() const = 0;
NJson::TJsonValue GetTablesJson() const;

THolder<TGeneratorStateProcessor> StateProcessor;
private:
void GenerateDDLForTable(IOutputStream& result, const NJson::TJsonValue& table, bool single) const;
const TWorkloadBaseParams& Params;
};

Expand Down
1 change: 1 addition & 0 deletions ydb/library/workload/benchmark_base/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ SRCS(
PEERDIR(
ydb/library/accessor
ydb/library/workload/abstract
ydb/library/yaml_json
ydb/public/api/protos
)

Expand Down
118 changes: 0 additions & 118 deletions ydb/library/workload/clickbench/click_bench_schema.sql

This file was deleted.

Loading

0 comments on commit b7cfb36

Please sign in to comment.