diff --git a/src/core/algorithms/cfd/cfd_discovery.cpp b/src/core/algorithms/cfd/cfd_discovery.cpp index 4f706dfd70..eed967746f 100644 --- a/src/core/algorithms/cfd/cfd_discovery.cpp +++ b/src/core/algorithms/cfd/cfd_discovery.cpp @@ -40,8 +40,8 @@ void CFDDiscovery::RegisterOptions() { DESBORDANTE_OPTION_USING; RegisterOption(config::kTableOpt(&input_table_)); - RegisterOption(Option{&columns_number_, kCfdColumnsNumber, kDCfdColumnsNumber, 0u}); - RegisterOption(Option{&tuples_number_, kCfdTuplesNumber, kDCfdTuplesNumber, 0u}); + RegisterOption(Option{&columns_number_, kCfdColumnsNumber, kDCfdColumnsNumber, 0ul}); + RegisterOption(Option{&tuples_number_, kCfdTuplesNumber, kDCfdTuplesNumber, 0ul}); } int CFDDiscovery::NrCfds() const { diff --git a/src/core/algorithms/cfd/cfd_discovery.h b/src/core/algorithms/cfd/cfd_discovery.h index 7c9373e733..7dd16c6ded 100644 --- a/src/core/algorithms/cfd/cfd_discovery.h +++ b/src/core/algorithms/cfd/cfd_discovery.h @@ -26,8 +26,8 @@ class CFDDiscovery : public Algorithm { protected: config::InputTable input_table_; - unsigned columns_number_; - unsigned tuples_number_; + size_t columns_number_; + size_t tuples_number_; ItemsetCFDList cfd_list_; std::shared_ptr relation_; diff --git a/src/core/algorithms/cfd/model/cfd_relation_data.cpp b/src/core/algorithms/cfd/model/cfd_relation_data.cpp index 84928e92e9..f9f4ab7944 100644 --- a/src/core/algorithms/cfd/model/cfd_relation_data.cpp +++ b/src/core/algorithms/cfd/model/cfd_relation_data.cpp @@ -23,7 +23,7 @@ void CFDRelationData::AddNewItemsInFullTable(ItemDictionary& item_dictionary, std::vector const& string_row, std::vector& int_row, std::vector& data_rows, - int& unique_elems_number, unsigned num_columns) { + int& unique_elems_number, size_t num_columns) { int it; for (size_t i = 0; i < num_columns; i++) { auto ptr = item_dictionary.find(std::make_pair(i, string_row[i])); @@ -42,24 +42,29 @@ void CFDRelationData::AddNewItemsInFullTable(ItemDictionary& item_dictionary, } std::unique_ptr CFDRelationData::CreateFrom(model::IDatasetStream& parser, - unsigned columns_number, - unsigned tuples_number, - double c_sample, double r_sample) { + size_t columns_number, + size_t tuples_number, double c_sample, + double r_sample) { if (columns_number == 0 || tuples_number == 0) { return CFDRelationData::CreateFrom(parser, c_sample, r_sample); } + size_t const num_columns = std::min(parser.GetNumberOfColumns(), columns_number); + std::vector column_names; + column_names.reserve(num_columns); + for (AttributeIndex i = 0; static_cast(i) < num_columns; ++i) { + column_names.push_back(parser.GetColumnName(i)); + } // Fields of CFDRelationData class - auto schema = std::make_unique(parser.GetRelationName()); + auto schema = + std::make_unique(parser.GetRelationName(), std::move(column_names)); std::vector data_rows; ItemDictionary item_dictionary; std::vector items; ColumnesValuesDict columns_values_dict; int unique_elems_number = 1; - unsigned num_columns = parser.GetNumberOfColumns(); std::vector line; - num_columns = std::min(num_columns, columns_number); std::vector string_row(num_columns); while (parser.HasNextRow() && data_rows.size() < tuples_number) { line = parser.GetNextRow(); @@ -74,11 +79,8 @@ std::unique_ptr CFDRelationData::CreateFrom(model::IDatasetStre std::vector column_data; for (AttributeIndex i = 0; static_cast(i) < num_columns; ++i) { - auto column = Column(schema.get(), parser.GetColumnName(i), i); - schema->AppendColumn(std::move(column)); - column_data.emplace_back(schema->GetColumn(i), columns_values_dict[i]); + column_data.emplace_back(&schema->GetColumn(i), columns_values_dict[i]); } - schema->Init(); return std::make_unique(std::move(schema), std::move(column_data), std::move(data_rows), std::move(item_dictionary), @@ -120,7 +122,7 @@ void CFDRelationData::AddNewItemsInPartialTable(ItemDictionary& item_dictionary, std::unique_ptr CFDRelationData::CreateFrom(model::IDatasetStream& file_input, double c_sample, double r_sample) { // Fields of CFDRelationData class - auto schema = std::make_unique(file_input.GetRelationName()); + auto schema = RelationalSchema::CreateFrom(file_input); std::vector data_rows; ItemDictionary item_dictionary; std::vector items; @@ -149,11 +151,8 @@ std::unique_ptr CFDRelationData::CreateFrom(model::IDatasetStre std::vector column_data; for (AttributeIndex i = 0; i < num_columns; ++i) { - auto column = Column(schema.get(), file_input.GetColumnName(i), i); - schema->AppendColumn(std::move(column)); - column_data.emplace_back(schema->GetColumn(i), columns_values_dict[i]); + column_data.emplace_back(&schema->GetColumn(i), columns_values_dict[i]); } - schema->Init(); return std::make_unique(std::move(schema), std::move(column_data), std::move(data_rows), std::move(item_dictionary), std::move(items)); @@ -194,7 +193,7 @@ std::vector const& CFDRelationData::GetDomain(unsigned attr) const { } std::string CFDRelationData::GetAttrName(int index) const { - return GetSchema()->GetColumn(index)->GetName(); + return GetSchema()->GetColumn(index).GetName(); } int CFDRelationData::GetAttr(std::string const& s) const { diff --git a/src/core/algorithms/cfd/model/cfd_relation_data.h b/src/core/algorithms/cfd/model/cfd_relation_data.h index 67617406b3..acac49edc9 100644 --- a/src/core/algorithms/cfd/model/cfd_relation_data.h +++ b/src/core/algorithms/cfd/model/cfd_relation_data.h @@ -42,7 +42,7 @@ class CFDRelationData : public AbstractRelationData { static void AddNewItemsInFullTable(ItemDictionary &, ColumnesValuesDict &, std::vector &, std::vector const &, std::vector &, std::vector &, int &, - unsigned); + size_t); static void AddNewItemsInPartialTable(ItemDictionary &, ColumnesValuesDict &, std::vector &, std::vector const &, @@ -74,9 +74,8 @@ class CFDRelationData : public AbstractRelationData { static std::unique_ptr CreateFrom(model::IDatasetStream &file_input, double c_sample = 1, double r_sample = 1); static std::unique_ptr CreateFrom(model::IDatasetStream &file_input, - unsigned columns_number, - unsigned tuples_number, double c_sample = 1, - double r_sample = 1); + size_t columns_number, size_t tuples_number, + double c_sample = 1, double r_sample = 1); CFDRelationData(std::unique_ptr schema, std::vector column_data, std::vector data, diff --git a/src/core/algorithms/fd/aidfd/aid.cpp b/src/core/algorithms/fd/aidfd/aid.cpp index 7a4495fe7d..1c0d2b4a0f 100644 --- a/src/core/algorithms/fd/aidfd/aid.cpp +++ b/src/core/algorithms/fd/aidfd/aid.cpp @@ -19,12 +19,7 @@ void Aid::LoadDataInternal() { throw std::runtime_error("Unable to work on an empty dataset."); } - schema_ = std::make_unique(input_table_->GetRelationName()); - - for (size_t i = 0; i < number_of_attributes_; ++i) { - std::string const& column_name = input_table_->GetColumnName(static_cast(i)); - schema_->AppendColumn(column_name); - } + schema_ = RelationalSchema::CreateFrom(*input_table_); while (input_table_->HasNextRow()) { std::vector const& next_line = input_table_->GetNextRow(); @@ -160,7 +155,7 @@ void Aid::HandleConstantColumns(boost::dynamic_bitset<>& attributes) { attr_num != boost::dynamic_bitset<>::npos; attr_num = constant_columns_.find_next(attr_num)) { attributes[attr_num] = false; - Column rhs = *schema_->GetColumn(attr_num); + Column rhs = schema_->GetColumn(attr_num); RegisterFd(lhs, rhs); } } @@ -243,7 +238,7 @@ void Aid::InvertNegativeCover() { void Aid::RegisterFDs(size_t rhs_attribute, std::vector> const& list_of_lhs_attributes) { - Column rhs = *schema_->GetColumn(rhs_attribute); + Column rhs = schema_->GetColumn(rhs_attribute); for (auto const& lhs_attributes : list_of_lhs_attributes) { Vertical lhs = schema_->GetVertical(lhs_attributes); RegisterFd(lhs, rhs); diff --git a/src/core/algorithms/fd/depminer/depminer.cpp b/src/core/algorithms/fd/depminer/depminer.cpp index 1454f6b3b5..7a3269d20e 100644 --- a/src/core/algorithms/fd/depminer/depminer.cpp +++ b/src/core/algorithms/fd/depminer/depminer.cpp @@ -58,11 +58,11 @@ std::vector Depminer::GenerateCmaxSets(std::unordered_set con std::vector c_max_cets; for (auto const& column : this->schema_->GetColumns()) { - CMAXSet result(*column); + CMAXSet result(column); // finding all sets, which doesn't contain column for (auto const& ag : agree_sets) { - if (!ag.Contains(*column)) { + if (!ag.Contains(column)) { result.AddCombination(ag); } } @@ -110,16 +110,15 @@ std::vector Depminer::GenerateCmaxSets(std::unordered_set con return c_max_cets; } -void Depminer::LhsForColumn(std::unique_ptr const& column, - std::vector const& c_max_cets) { +void Depminer::LhsForColumn(Column const& column, std::vector const& c_max_cets) { std::unordered_set level; // 3 - CMAXSet correct = GenFirstLevel(c_max_cets, *column, level); + CMAXSet correct = GenFirstLevel(c_max_cets, column, level); - auto const pli = relation_->GetColumnData(column->GetIndex()).GetPositionListIndex(); + auto const pli = relation_->GetColumnData(column.GetIndex()).GetPositionListIndex(); bool column_contains_only_equal_values = pli->IsConstant(); if (column_contains_only_equal_values) { - RegisterFd(Vertical(), *column); + RegisterFd(Vertical(), column); return; } @@ -137,8 +136,8 @@ void Depminer::LhsForColumn(std::unique_ptr const& column, } // 6 if (is_fd) { - if (!l.Contains(*column)) { - this->RegisterFd(l, *column); + if (!l.Contains(column)) { + this->RegisterFd(l, column); } level_copy.erase(l); } diff --git a/src/core/algorithms/fd/depminer/depminer.h b/src/core/algorithms/fd/depminer/depminer.h index d60ec3cf48..e693170413 100644 --- a/src/core/algorithms/fd/depminer/depminer.h +++ b/src/core/algorithms/fd/depminer/depminer.h @@ -13,7 +13,7 @@ class Depminer : public PliBasedFDAlgorithm { std::unordered_set const& prev_level); static bool CheckJoin(Vertical const& _p, Vertical const& _q); - void LhsForColumn(std::unique_ptr const& column, std::vector const& cmax_sets); + void LhsForColumn(Column const& column, std::vector const& cmax_sets); std::vector GenerateCmaxSets(std::unordered_set const& agree_sets); double progress_step_ = 0; diff --git a/src/core/algorithms/fd/dfd/dfd.cpp b/src/core/algorithms/fd/dfd/dfd.cpp index f9fd83abb4..06f61c71e1 100644 --- a/src/core/algorithms/fd/dfd/dfd.cpp +++ b/src/core/algorithms/fd/dfd/dfd.cpp @@ -38,11 +38,11 @@ unsigned long long DFD::ExecuteInternal() { // search for unique columns for (auto const& column : schema->GetColumns()) { - ColumnData& column_data = relation_->GetColumnData(column->GetIndex()); + ColumnData& column_data = relation_->GetColumnData(column.GetIndex()); model::PositionListIndex const* const column_pli = column_data.GetPositionListIndex(); if (column_pli->AllValuesAreUnique()) { - Vertical const lhs = Vertical(*column); + Vertical const lhs = Vertical(column); unique_columns_.push_back(lhs); // we do not register an FD at once, because we check for FDs with empty LHS later } @@ -51,10 +51,10 @@ unsigned long long DFD::ExecuteInternal() { double progress_step = 100.0 / schema->GetNumColumns(); boost::asio::thread_pool search_space_pool(number_of_threads_); - for (auto& rhs : schema->GetColumns()) { + for (auto const& rhs : schema->GetColumns()) { boost::asio::post( search_space_pool, [this, &rhs, schema, progress_step, &partition_storage]() { - ColumnData const& rhs_data = relation_->GetColumnData(rhs->GetIndex()); + ColumnData const& rhs_data = relation_->GetColumnData(rhs.GetIndex()); model::PositionListIndex const* const rhs_pli = rhs_data.GetPositionListIndex(); /* if all the rows have the same value, then we register FD with empty LHS @@ -62,17 +62,17 @@ unsigned long long DFD::ExecuteInternal() { * this RHS, so we register it and move to the next RHS * */ if (rhs_pli->GetNepAsLong() == relation_->GetNumTuplePairs()) { - RegisterFd(*(schema->empty_vertical_), *rhs); + RegisterFd(*(schema->empty_vertical_), rhs); AddProgress(progress_step); return; } - auto search_space = LatticeTraversal(rhs.get(), relation_.get(), - unique_columns_, partition_storage.get()); + auto search_space = LatticeTraversal(&rhs, relation_.get(), unique_columns_, + partition_storage.get()); auto const minimal_deps = search_space.FindLHSs(); for (auto const& minimal_dependency_lhs : minimal_deps) { - RegisterFd(minimal_dependency_lhs, *rhs); + RegisterFd(minimal_dependency_lhs, rhs); } AddProgress(progress_step); LOG(INFO) << static_cast(GetProgress().second); diff --git a/src/core/algorithms/fd/dfd/lattice_observations/lattice_observations.cpp b/src/core/algorithms/fd/dfd/lattice_observations/lattice_observations.cpp index 11fee8346a..7ddbe0f2be 100644 --- a/src/core/algorithms/fd/dfd/lattice_observations/lattice_observations.cpp +++ b/src/core/algorithms/fd/dfd/lattice_observations/lattice_observations.cpp @@ -49,7 +49,7 @@ NodeCategory LatticeObservations::UpdateNonDependencyCategory(Vertical const& no for (size_t index = column_indices.find_first(); index < column_indices.size(); index = column_indices.find_next(index)) { - auto const superset_node_iter = this->find(node.Union(*node.GetSchema()->GetColumn(index))); + auto const superset_node_iter = this->find(node.Union(node.GetSchema()->GetColumn(index))); if (superset_node_iter == this->end()) { // if we found unchecked superset of this node diff --git a/src/core/algorithms/fd/dfd/lattice_traversal/lattice_traversal.cpp b/src/core/algorithms/fd/dfd/lattice_traversal/lattice_traversal.cpp index 73015d6098..b7677caf7a 100644 --- a/src/core/algorithms/fd/dfd/lattice_traversal/lattice_traversal.cpp +++ b/src/core/algorithms/fd/dfd/lattice_traversal/lattice_traversal.cpp @@ -37,7 +37,7 @@ std::unordered_set LatticeTraversal::FindLHSs() { for (unsigned partition_index : column_order_.GetOrderHighDistinctCount(Vertical(*rhs_).Invert())) { if (partition_index != rhs_->GetIndex()) { - seeds.push(Vertical(*schema->GetColumn(partition_index))); + seeds.push(Vertical(schema->GetColumn(partition_index))); } } diff --git a/src/core/algorithms/fd/dfd/lattice_traversal/lattice_traversal.h b/src/core/algorithms/fd/dfd/lattice_traversal/lattice_traversal.h index 6c5cff1a24..dad72298ed 100644 --- a/src/core/algorithms/fd/dfd/lattice_traversal/lattice_traversal.h +++ b/src/core/algorithms/fd/dfd/lattice_traversal/lattice_traversal.h @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include #include "../column_order/column_order.h" #include "../lattice_observations/lattice_observations.h" diff --git a/src/core/algorithms/fd/dfd/partition_storage/partition_storage.cpp b/src/core/algorithms/fd/dfd/partition_storage/partition_storage.cpp index 9b0192c38f..f4c31b6b54 100644 --- a/src/core/algorithms/fd/dfd/partition_storage/partition_storage.cpp +++ b/src/core/algorithms/fd/dfd/partition_storage/partition_storage.cpp @@ -18,9 +18,9 @@ PartitionStorage::PartitionStorage(ColumnLayoutRelationData* relation_data, relation_data->GetSchema())), caching_method_(caching_method), eviction_method_(eviction_method) { - for (auto& column_ptr : relation_data->GetSchema()->GetColumns()) { - index_->Put(static_cast(*column_ptr), - relation_data->GetColumnData(column_ptr->GetIndex()).GetPliOwnership()); + for (auto const& column : relation_data->GetSchema()->GetColumns()) { + index_->Put(static_cast(column), + relation_data->GetColumnData(column.GetIndex()).GetPliOwnership()); } } diff --git a/src/core/algorithms/fd/dfd/pruning_maps/pruning_map.cpp b/src/core/algorithms/fd/dfd/pruning_maps/pruning_map.cpp index f3d76193d4..9020a4dc1a 100644 --- a/src/core/algorithms/fd/dfd/pruning_maps/pruning_map.cpp +++ b/src/core/algorithms/fd/dfd/pruning_maps/pruning_map.cpp @@ -2,7 +2,7 @@ PruningMap::PruningMap(RelationalSchema const* schema) { for (auto const& column : schema->GetColumns()) { - this->insert(std::make_pair(Vertical(*column), std::unordered_set())); + this->insert(std::make_pair(Vertical(column), std::unordered_set())); } } @@ -32,7 +32,7 @@ void PruningMap::RebalanceGroup(Vertical const& key) { for (size_t column_index = inverted_columns.find_first(); column_index < inverted_columns.size(); column_index = inverted_columns.find_next(column_index)) { - Vertical new_key = key.Union(*key.GetSchema()->GetColumn(column_index)); + Vertical new_key = key.Union(key.GetSchema()->GetColumn(column_index)); std::unordered_set new_group; for (auto const& dep_of_group : deps_of_group) { diff --git a/src/core/algorithms/fd/fastfds/fastfds.cpp b/src/core/algorithms/fd/fastfds/fastfds.cpp index 4c6df4db9d..fba9b3e60b 100644 --- a/src/core/algorithms/fd/fastfds/fastfds.cpp +++ b/src/core/algorithms/fd/fastfds/fastfds.cpp @@ -56,19 +56,19 @@ unsigned long long FastFDs::ExecuteInternal() { return elapsed_milliseconds.count(); } - auto task = [this](std::unique_ptr const& column) { - if (ColumnContainsOnlyEqualValues(*column)) { + auto task = [this](Column const& column) { + if (ColumnContainsOnlyEqualValues(column)) { LOG(DEBUG) << "Registered FD: " << schema_->empty_vertical_->ToString() << "->" - << column->ToString(); - RegisterFd(Vertical(), *column); + << column.ToString(); + RegisterFd(Vertical(), column); return; } - vector diff_sets_mod = GetDiffSetsMod(*column); + vector diff_sets_mod = GetDiffSetsMod(column); assert(!diff_sets_mod.empty()); if (!(diff_sets_mod.size() == 1 && diff_sets_mod.back() == *schema_->empty_vertical_)) { - set init_ordering = GetInitOrdering(diff_sets_mod, *column); - FindCovers(*column, diff_sets_mod, diff_sets_mod, *schema_->empty_vertical_, + set init_ordering = GetInitOrdering(diff_sets_mod, column); + FindCovers(column, diff_sets_mod, diff_sets_mod, *schema_->empty_vertical_, init_ordering); } else { AddProgress(percent_per_col_); @@ -78,13 +78,13 @@ unsigned long long FastFDs::ExecuteInternal() { if (threads_num_ > 1) { boost::asio::thread_pool pool(threads_num_); - for (std::unique_ptr const& column : schema_->GetColumns()) { + for (Column const& column : schema_->GetColumns()) { boost::asio::post(pool, [&column, task]() { return task(column); }); } pool.join(); } else { - for (std::unique_ptr const& column : schema_->GetColumns()) { + for (Column const& column : schema_->GetColumns()) { task(column); } } @@ -195,8 +195,8 @@ set FastFDs::GetInitOrdering(vector ordering(ordering_comp); for (auto const& col : schema_->GetColumns()) { - if (*col != attribute) { - ordering.insert(*col); + if (col != attribute) { + ordering.insert(col); } } diff --git a/src/core/algorithms/fd/fd_algorithm.cpp b/src/core/algorithms/fd/fd_algorithm.cpp index 420dbd4f01..e9f3d56587 100644 --- a/src/core/algorithms/fd/fd_algorithm.cpp +++ b/src/core/algorithms/fd/fd_algorithm.cpp @@ -5,6 +5,7 @@ #include #include "config/max_lhs/option.h" +#include "model/table/relational_schema.h" namespace algos { diff --git a/src/core/algorithms/fd/fd_mine/fd_mine.cpp b/src/core/algorithms/fd/fd_mine/fd_mine.cpp index 9bc202ad21..95b2fd82be 100644 --- a/src/core/algorithms/fd/fd_mine/fd_mine.cpp +++ b/src/core/algorithms/fd/fd_mine/fd_mine.cpp @@ -276,8 +276,8 @@ void FdMine::Display() { } Vertical lhs_vertical(schema_, lhs); LOG(DEBUG) << "Discovered FD: " << lhs_vertical.ToString() << " -> " - << schema_->GetColumn(j)->GetName(); - RegisterFd(std::move(lhs_vertical), *schema_->GetColumn(j)); + << schema_->GetColumn(j).GetName(); + RegisterFd(std::move(lhs_vertical), schema_->GetColumn(j)); fd_counter++; } } diff --git a/src/core/algorithms/fd/fdep/fd_tree_element.cpp b/src/core/algorithms/fd/fdep/fd_tree_element.cpp index b40780ec34..e87c9eadbc 100644 --- a/src/core/algorithms/fd/fdep/fd_tree_element.cpp +++ b/src/core/algorithms/fd/fdep/fd_tree_element.cpp @@ -263,7 +263,7 @@ void FDTreeElement::TransformTreeFdCollection(std::bitset& active_p lhs_bitset.set(i - 1); } Vertical lhs(&scheme, lhs_bitset); - Column rhs(&scheme, scheme.GetColumn(attr - 1)->GetName(), attr - 1); + Column rhs(&scheme, scheme.GetColumn(attr - 1).GetName(), attr - 1); fd_collection.emplace_back(FD{lhs, rhs}); } } diff --git a/src/core/algorithms/fd/fdep/fdep.cpp b/src/core/algorithms/fd/fdep/fdep.cpp index 1bd698d3c6..b89f9a92d6 100644 --- a/src/core/algorithms/fd/fdep/fdep.cpp +++ b/src/core/algorithms/fd/fdep/fdep.cpp @@ -28,13 +28,12 @@ void FDep::LoadDataInternal() { } column_names_.resize(number_attributes_); - schema_ = std::make_unique(input_table_->GetRelationName()); - for (size_t i = 0; i < number_attributes_; ++i) { column_names_[i] = input_table_->GetColumnName(static_cast(i)); - schema_->AppendColumn(column_names_[i]); } + schema_ = RelationalSchema::CreateFrom(*input_table_); + std::vector next_line; while (input_table_->HasNextRow()) { next_line = input_table_->GetNextRow(); diff --git a/src/core/algorithms/fd/fun/fun.cpp b/src/core/algorithms/fd/fun/fun.cpp index d28daec375..6c1d14fc72 100644 --- a/src/core/algorithms/fd/fun/fun.cpp +++ b/src/core/algorithms/fd/fun/fun.cpp @@ -161,16 +161,16 @@ unsigned long long FUN::ExecuteInternal() { r_prime_ = empty_vertical; Level l_k_minus_1{FunQuadruple(empty_vertical)}; Level l_k; - for (std::unique_ptr const& a : schema_->GetColumns()) { - FunQuadruple attribute(*a); + for (Column const& a : schema_->GetColumns()) { + FunQuadruple attribute(a); attribute.SetCount(Count(attribute.GetCandidate())); l_k.push_back(attribute); - r_ = r_.Union(*a); + r_ = r_.Union(a); if (!IsKey(attribute)) { - r_prime_ = r_prime_.Union(*a); + r_prime_ = r_prime_.Union(a); } if (attribute.GetCount() == 1) { - fds_.emplace(*a, std::set{empty_vertical}); + fds_.emplace(a, std::set{empty_vertical}); } } diff --git a/src/core/algorithms/fd/hyfd/hyfd.cpp b/src/core/algorithms/fd/hyfd/hyfd.cpp index ce4adf07d7..fa740433cb 100644 --- a/src/core/algorithms/fd/hyfd/hyfd.cpp +++ b/src/core/algorithms/fd/hyfd/hyfd.cpp @@ -71,7 +71,7 @@ void HyFD::RegisterFDs(std::vector&& fds, std::vector cons Vertical lhs_v(schema, std::move(mapped_lhs)); auto const mapped_rhs = og_mapping[rhs]; - Column rhs_c(schema, schema->GetColumn(mapped_rhs)->GetName(), mapped_rhs); + Column rhs_c(schema, schema->GetColumn(mapped_rhs).GetName(), mapped_rhs); RegisterFd(std::move(lhs_v), std::move(rhs_c)); } diff --git a/src/core/algorithms/fd/hyfd/validator.cpp b/src/core/algorithms/fd/hyfd/validator.cpp index 54fbf23a4d..47e44264ff 100644 --- a/src/core/algorithms/fd/hyfd/validator.cpp +++ b/src/core/algorithms/fd/hyfd/validator.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -12,6 +13,7 @@ #include "algorithms/fd/hycommon/util/pli_util.h" #include "algorithms/fd/hycommon/validator_helpers.h" #include "hyfd_config.h" +#include "util/bitset_utils.h" namespace { diff --git a/src/core/algorithms/fd/pfdtane/pfdtane.cpp b/src/core/algorithms/fd/pfdtane/pfdtane.cpp index 32e8727bed..b443b9dafa 100644 --- a/src/core/algorithms/fd/pfdtane/pfdtane.cpp +++ b/src/core/algorithms/fd/pfdtane/pfdtane.cpp @@ -88,11 +88,10 @@ double PFDTane::CalculateUccError(model::PositionListIndex const* pli, return pli->GetNepAsLong() / static_cast(relation_data->GetNumTuplePairs()); } -void PFDTane::RegisterAndCountFd(Vertical const& lhs, Column const* rhs, - [[maybe_unused]] config::ErrorType error, - [[maybe_unused]] RelationalSchema const* schema) { +void PFDTane::RegisterAndCountFd(Vertical const& lhs, Column const& rhs, config::ErrorType, + RelationalSchema const*) { dynamic_bitset<> lhs_bitset = lhs.GetColumnIndices(); - PliBasedFDAlgorithm::RegisterFd(lhs, *rhs); + PliBasedFDAlgorithm::RegisterFd(lhs, rhs); } void PFDTane::Prune(model::LatticeLevel* level) { @@ -111,7 +110,7 @@ void PFDTane::Prune(model::LatticeLevel* level) { for (std::size_t rhs_index = vertex->GetRhsCandidates().find_first(); rhs_index != boost::dynamic_bitset<>::npos; rhs_index = vertex->GetRhsCandidates().find_next(rhs_index)) { - Vertical rhs = static_cast(*schema->GetColumn((int)rhs_index)); + Vertical rhs = static_cast(schema->GetColumn(rhs_index)); if (!columns.Contains(rhs)) { bool is_rhs_candidate = true; for (auto const& column : columns.GetColumns()) { @@ -186,10 +185,10 @@ void PFDTane::ComputeDependencies(model::LatticeLevel* level) { // Check X -> A config::ErrorType error = CalculateFdError(x_pli, xa_pli, error_measure_); if (error <= max_fd_error_) { - Column const* rhs = schema->GetColumns()[a_index].get(); + Column const& rhs = schema->GetColumns()[a_index]; RegisterAndCountFd(lhs, rhs, error, schema); - xa_vertex->GetRhsCandidates().set(rhs->GetIndex(), false); + xa_vertex->GetRhsCandidates().set(rhs.GetIndex(), false); if (error == 0) { xa_vertex->GetRhsCandidates() &= lhs.GetColumnIndices(); } @@ -208,11 +207,10 @@ unsigned long long PFDTane::ExecuteInternal() { << relation_->GetMaximumNip() << "."; for (auto& column : schema->GetColumns()) { - double avg_partners = relation_->GetColumnData(column->GetIndex()) - .GetPositionListIndex() - ->GetNepAsLong() * - 2.0 / relation_->GetNumRows(); - LOG(DEBUG) << "* " << column->ToString() << ": every tuple has " << std::setw(2) + double avg_partners = + relation_->GetColumnData(column.GetIndex()).GetPositionListIndex()->GetNepAsLong() * + 2.0 / relation_->GetNumRows(); + LOG(DEBUG) << "* " << column.ToString() << ": every tuple has " << std::setw(2) << avg_partners << " partners on average."; } auto start_time = std::chrono::system_clock::now(); @@ -232,8 +230,8 @@ unsigned long long PFDTane::ExecuteInternal() { auto level1 = std::make_unique(1); for (auto& column : schema->GetColumns()) { // for each attribute set vertex - ColumnData const& column_data = relation_->GetColumnData(column->GetIndex()); - auto vertex = std::make_unique(static_cast(*column)); + ColumnData const& column_data = relation_->GetColumnData(column.GetIndex()); + auto vertex = std::make_unique(static_cast(column)); vertex->AddRhsCandidates(schema->GetColumns()); vertex->GetParents().push_back(empty_vertex); @@ -243,10 +241,10 @@ unsigned long long PFDTane::ExecuteInternal() { // check FDs: 0->A double fd_error = CalculateZeroAryFdError(&column_data); if (fd_error <= max_fd_error_) { // TODO: max_error - zeroary_fd_rhs.set(column->GetIndex()); - RegisterAndCountFd(*schema->empty_vertical_, column.get(), fd_error, schema); + zeroary_fd_rhs.set(column.GetIndex()); + RegisterAndCountFd(*schema->empty_vertical_, column, fd_error, schema); - vertex->GetRhsCandidates().set(column->GetIndex(), false); + vertex->GetRhsCandidates().set(column.GetIndex(), false); if (fd_error == 0) { vertex->GetRhsCandidates().reset(); } diff --git a/src/core/algorithms/fd/pfdtane/pfdtane.h b/src/core/algorithms/fd/pfdtane/pfdtane.h index b80951639e..a92c6fe04f 100644 --- a/src/core/algorithms/fd/pfdtane/pfdtane.h +++ b/src/core/algorithms/fd/pfdtane/pfdtane.h @@ -29,7 +29,7 @@ class PFDTane : public PliBasedFDAlgorithm { static double CalculateUccError(model::PositionListIndex const* pli, ColumnLayoutRelationData const* relation_data); - void RegisterAndCountFd(Vertical const& lhs, Column const* rhs, double error, + void RegisterAndCountFd(Vertical const& lhs, Column const& rhs, double error, RelationalSchema const* schema); static config::ErrorType CalculateZeroAryFdError(ColumnData const* rhs); static config::ErrorType CalculateFdError(model::PositionListIndex const* x_pli, diff --git a/src/core/algorithms/fd/pyro/pyro.cpp b/src/core/algorithms/fd/pyro/pyro.cpp index b14467dcb0..4c6e89f6c8 100644 --- a/src/core/algorithms/fd/pyro/pyro.cpp +++ b/src/core/algorithms/fd/pyro/pyro.cpp @@ -63,10 +63,10 @@ unsigned long long Pyro::ExecuteInternal() { } int next_id = 0; - for (auto& rhs : schema->GetColumns()) { + for (auto const& rhs : schema->GetColumns()) { std::unique_ptr strategy; if (parameters_.ucc_error_measure == "g1prime") { - strategy = std::make_unique(rhs.get(), parameters_.max_ucc_error, + strategy = std::make_unique(&rhs, parameters_.max_ucc_error, parameters_.error_dev); } else { throw std::runtime_error("Unknown key error measure."); diff --git a/src/core/algorithms/fd/pyrocommon/core/key_g1_strategy.cpp b/src/core/algorithms/fd/pyrocommon/core/key_g1_strategy.cpp index a2537b9eda..a9d6cd28fa 100644 --- a/src/core/algorithms/fd/pyrocommon/core/key_g1_strategy.cpp +++ b/src/core/algorithms/fd/pyrocommon/core/key_g1_strategy.cpp @@ -20,10 +20,10 @@ double KeyG1Strategy::CalculateKeyError(double num_violating_tuple_pairs) const void KeyG1Strategy::EnsureInitialized(SearchSpace* search_space) const { if (search_space->is_initialized_) return; - for (auto& column : context_->GetSchema()->GetColumns()) { - if (IsIrrelevantColumn(column->GetIndex())) continue; + for (auto const& column : context_->GetSchema()->GetColumns()) { + if (IsIrrelevantColumn(column.GetIndex())) continue; - search_space->AddLaunchPad(CreateDependencyCandidate(static_cast(*column))); + search_space->AddLaunchPad(CreateDependencyCandidate(static_cast(column))); } search_space->is_initialized_ = true; diff --git a/src/core/algorithms/fd/pyrocommon/core/profiling_context.cpp b/src/core/algorithms/fd/pyrocommon/core/profiling_context.cpp index ef63d53886..1a36eaca4b 100644 --- a/src/core/algorithms/fd/pyrocommon/core/profiling_context.cpp +++ b/src/core/algorithms/fd/pyrocommon/core/profiling_context.cpp @@ -33,10 +33,10 @@ ProfilingContext::ProfilingContext(algos::pyro::Parameters parameters, std::make_unique>(schema); // TODO: сделать, чтобы при одном потоке agree_set_samples_ = // std::make_unique>(schema); - for (auto& column : schema->GetColumns()) { + for (auto const& column : schema->GetColumns()) { CreateColumnFocusedSample( - static_cast(*column), - relation_data->GetColumnData(column->GetIndex()).GetPositionListIndex(), 1); + static_cast(column), + relation_data->GetColumnData(column.GetIndex()).GetPositionListIndex(), 1); } } else { agree_set_samples_ = nullptr; diff --git a/src/core/algorithms/fd/pyrocommon/core/search_space.cpp b/src/core/algorithms/fd/pyrocommon/core/search_space.cpp index 79fead7e99..7652cadc1c 100644 --- a/src/core/algorithms/fd/pyrocommon/core/search_space.cpp +++ b/src/core/algorithms/fd/pyrocommon/core/search_space.cpp @@ -219,13 +219,13 @@ bool SearchSpace::Ascend(DependencyCandidate const& launch_pad) { boost::optional next_candidate; int num_seen_elements = is_ascend_randomly_ ? 1 : -1; - for (auto& extension_column : context_->GetSchema()->GetColumns()) { - if (traversal_candidate.vertical_.GetColumnIndices()[extension_column->GetIndex()] || - strategy_->IsIrrelevantColumn(*extension_column)) { + for (auto const& extension_column : context_->GetSchema()->GetColumns()) { + if (traversal_candidate.vertical_.GetColumnIndices()[extension_column.GetIndex()] || + strategy_->IsIrrelevantColumn(extension_column)) { continue; } auto extended_vertical = - traversal_candidate.vertical_.Union(static_cast(*extension_column)); + traversal_candidate.vertical_.Union(static_cast(extension_column)); if (scope_ != nullptr && scope_->GetSupersetEntries(extended_vertical).empty()) { continue; diff --git a/src/core/algorithms/fd/pyrocommon/model/pli_cache.cpp b/src/core/algorithms/fd/pyrocommon/model/pli_cache.cpp index 2365f5a7ec..fd7535403b 100644 --- a/src/core/algorithms/fd/pyrocommon/model/pli_cache.cpp +++ b/src/core/algorithms/fd/pyrocommon/model/pli_cache.cpp @@ -29,17 +29,15 @@ PLICache::PLICache(ColumnLayoutRelationData* relation_data, CachingMethod cachin median_entropy_(median_entropy), median_gini_(median_gini), median_inverted_entropy_(median_inverted_entropy) { - for (auto& column_ptr : relation_data->GetSchema()->GetColumns()) { - index_->Put(static_cast(*column_ptr), - relation_data->GetColumnData(column_ptr->GetIndex()).GetPliOwnership()); + for (auto& column : relation_data->GetSchema()->GetColumns()) { + index_->Put(static_cast(column), + relation_data->GetColumnData(column.GetIndex()).GetPliOwnership()); } } PLICache::~PLICache() { - for (auto& column_ptr : relation_data_->GetSchema()->GetColumns()) { - // auto PLI = - index_->Remove(static_cast(*column_ptr)); - // relation_data_->GetColumnData(column_ptr->getIndex()).getPLI(std::move(PLI)); + for (auto const& column : relation_data_->GetSchema()->GetColumns()) { + index_->Remove(static_cast(column)); } } diff --git a/src/core/algorithms/fd/tane/lattice_vertex.cpp b/src/core/algorithms/fd/tane/lattice_vertex.cpp index d4b782ef72..b1986b2e9f 100644 --- a/src/core/algorithms/fd/tane/lattice_vertex.cpp +++ b/src/core/algorithms/fd/tane/lattice_vertex.cpp @@ -4,9 +4,9 @@ namespace model { using boost::dynamic_bitset, std::vector, std::shared_ptr, std::make_shared, std::string; -void LatticeVertex::AddRhsCandidates(vector> const& candidates) { - for (auto& cand_ptr : candidates) { - rhs_candidates_.set(cand_ptr->GetIndex()); +void LatticeVertex::AddRhsCandidates(vector const& candidates) { + for (auto& cand : candidates) { + rhs_candidates_.set(cand.GetIndex()); } } diff --git a/src/core/algorithms/fd/tane/lattice_vertex.h b/src/core/algorithms/fd/tane/lattice_vertex.h index 43c20412d6..d70b8c1332 100644 --- a/src/core/algorithms/fd/tane/lattice_vertex.h +++ b/src/core/algorithms/fd/tane/lattice_vertex.h @@ -43,7 +43,7 @@ class LatticeVertex { return rhs_candidates_; } - void AddRhsCandidates(std::vector> const& candidates); + void AddRhsCandidates(std::vector const& candidates); bool ComesBeforeAndSharePrefixWith(LatticeVertex const& that) const; diff --git a/src/core/algorithms/fd/tane/tane.cpp b/src/core/algorithms/fd/tane/tane.cpp index 0316b1149e..f11e2a597b 100644 --- a/src/core/algorithms/fd/tane/tane.cpp +++ b/src/core/algorithms/fd/tane/tane.cpp @@ -56,10 +56,10 @@ double Tane::CalculateUccError(model::PositionListIndex const* pli, return pli->GetNepAsLong() / static_cast(relation_data->GetNumTuplePairs()); } -void Tane::RegisterAndCountFd(Vertical const& lhs, Column const* rhs, [[maybe_unused]] double error, +void Tane::RegisterAndCountFd(Vertical const& lhs, Column const& rhs, [[maybe_unused]] double error, [[maybe_unused]] RelationalSchema const* schema) { dynamic_bitset<> lhs_bitset = lhs.GetColumnIndices(); - PliBasedFDAlgorithm::RegisterFd(lhs, *rhs); + PliBasedFDAlgorithm::RegisterFd(lhs, rhs); count_of_fd_++; } @@ -82,12 +82,11 @@ unsigned long long Tane::ExecuteInternal() { << relation_->GetNumRows() << " rows, and a maximum NIP of " << std::setw(2) << relation_->GetMaximumNip() << "."; - for (auto& column : schema->GetColumns()) { - double avg_partners = relation_->GetColumnData(column->GetIndex()) - .GetPositionListIndex() - ->GetNepAsLong() * - 2.0 / relation_->GetNumRows(); - LOG(INFO) << "* " << column->ToString() << ": every tuple has " << std::setw(2) + for (auto const& column : schema->GetColumns()) { + double avg_partners = + relation_->GetColumnData(column.GetIndex()).GetPositionListIndex()->GetNepAsLong() * + 2.0 / relation_->GetNumRows(); + LOG(INFO) << "* " << column.ToString() << ": every tuple has " << std::setw(2) << avg_partners << " partners on average."; } auto start_time = std::chrono::system_clock::now(); @@ -105,10 +104,10 @@ unsigned long long Tane::ExecuteInternal() { // Initialize level1 dynamic_bitset<> zeroary_fd_rhs(schema->GetNumColumns()); auto level1 = std::make_unique(1); - for (auto& column : schema->GetColumns()) { + for (auto const& column : schema->GetColumns()) { // for each attribute set vertex - ColumnData const& column_data = relation_->GetColumnData(column->GetIndex()); - auto vertex = std::make_unique(static_cast(*column)); + ColumnData const& column_data = relation_->GetColumnData(column.GetIndex()); + auto vertex = std::make_unique(static_cast(column)); vertex->AddRhsCandidates(schema->GetColumns()); vertex->GetParents().push_back(empty_vertex); @@ -118,10 +117,10 @@ unsigned long long Tane::ExecuteInternal() { // check FDs: 0->A double fd_error = CalculateZeroAryFdError(&column_data, relation_.get()); if (fd_error <= max_fd_error_) { // TODO: max_error - zeroary_fd_rhs.set(column->GetIndex()); - RegisterAndCountFd(*schema->empty_vertical_, column.get(), fd_error, schema); + zeroary_fd_rhs.set(column.GetIndex()); + RegisterAndCountFd(*schema->empty_vertical_, column, fd_error, schema); - vertex->GetRhsCandidates().set(column->GetIndex(), false); + vertex->GetRhsCandidates().set(column.GetIndex(), false); if (fd_error == 0) { vertex->GetRhsCandidates().reset(); } @@ -213,11 +212,11 @@ unsigned long long Tane::ExecuteInternal() { double error = CalculateFdError(x_vertex->GetPositionListIndex(), xa_vertex->GetPositionListIndex(), relation_.get()); if (error <= max_fd_error_) { - Column const* rhs = schema->GetColumns()[a_index].get(); + Column const& rhs = schema->GetColumns()[a_index]; // TODO: register FD to a file or something RegisterAndCountFd(lhs, rhs, error, schema); - xa_vertex->GetRhsCandidates().set(rhs->GetIndex(), false); + xa_vertex->GetRhsCandidates().set(rhs.GetIndex(), false); if (error == 0) { xa_vertex->GetRhsCandidates() &= lhs.GetColumnIndices(); } @@ -248,8 +247,7 @@ unsigned long long Tane::ExecuteInternal() { for (size_t rhs_index = vertex->GetRhsCandidates().find_first(); rhs_index != boost::dynamic_bitset<>::npos; rhs_index = vertex->GetRhsCandidates().find_next(rhs_index)) { - Vertical rhs = - static_cast(*schema->GetColumn((int)rhs_index)); + auto rhs = static_cast(schema->GetColumn(rhs_index)); if (!columns.Contains(rhs)) { bool is_rhs_candidate = true; for (auto const& column : columns.GetColumns()) { diff --git a/src/core/algorithms/fd/tane/tane.h b/src/core/algorithms/fd/tane/tane.h index 4de4cf05c4..7212d6c17d 100644 --- a/src/core/algorithms/fd/tane/tane.h +++ b/src/core/algorithms/fd/tane/tane.h @@ -37,7 +37,7 @@ class Tane : public PliBasedFDAlgorithm { // static double round(double error) { return ((int)(error * 32768) + 1)/ 32768.0; } - void RegisterAndCountFd(Vertical const& lhs, Column const* rhs, double error, + void RegisterAndCountFd(Vertical const& lhs, Column const& rhs, double error, RelationalSchema const* schema); // void RegisterFd(Vertical const* lhs, Column const* rhs, double error, RelationalSchema const* // schema); diff --git a/src/core/algorithms/ind/faida/preprocessing/abstract_column_store.cpp b/src/core/algorithms/ind/faida/preprocessing/abstract_column_store.cpp index f0b9bc41f2..766d0e4b25 100644 --- a/src/core/algorithms/ind/faida/preprocessing/abstract_column_store.cpp +++ b/src/core/algorithms/ind/faida/preprocessing/abstract_column_store.cpp @@ -14,12 +14,7 @@ void AbstractColumnStore::LoadData(std::string const& dataset_name, TableIndex t throw std::runtime_error("Got an empty file: IND mining is meaningless."); } - schema_ = std::make_unique(input_data.GetRelationName()); - for (ColumnIndex col_idx = 0; col_idx < num_columns; ++col_idx) { - auto column = Column(schema_.get(), input_data.GetColumnName(col_idx), col_idx); - schema_->AppendColumn(std::move(column)); - } - schema_->Init(); + schema_ = RelationalSchema::CreateFrom(input_data); column_properties_ = std::vector(input_data.GetNumberOfColumns(), ColumnProperty::kOrdinary); diff --git a/src/core/algorithms/ind/faida/preprocessing/hashed_column_store.cpp b/src/core/algorithms/ind/faida/preprocessing/hashed_column_store.cpp index c6b0e79885..573495970f 100644 --- a/src/core/algorithms/ind/faida/preprocessing/hashed_column_store.cpp +++ b/src/core/algorithms/ind/faida/preprocessing/hashed_column_store.cpp @@ -6,15 +6,15 @@ namespace algos::faida { std::filesystem::path HashedColumnStore::PrepareDirNext(std::filesystem::path dir, TableIndex table_idx) { - for (std::unique_ptr const& column : schema_->GetColumns()) { + for (Column const& column : schema_->GetColumns()) { std::string file_name; file_name += std::to_string(table_idx); file_name += "_"; - file_name += std::to_string(column->GetIndex()); + file_name += std::to_string(column.GetIndex()); file_name += ".bin"; std::filesystem::path column_file = dir / file_name; - column_files_[column->GetIndex()] = std::move(column_file); + column_files_[column.GetIndex()] = std::move(column_file); } return dir; } diff --git a/src/core/algorithms/ind/ind.cpp b/src/core/algorithms/ind/ind.cpp index a8e0a69749..b4e22aa758 100644 --- a/src/core/algorithms/ind/ind.cpp +++ b/src/core/algorithms/ind/ind.cpp @@ -37,7 +37,7 @@ std::string IND::ToLongString() const { if (it != indices.begin()) { ss << ", "; } - ss << schemas_->at(table_idx).GetColumn(*it)->GetName(); + ss << schemas_->at(table_idx).GetColumn(*it).GetName(); } ss << "])"; return ss.str(); diff --git a/src/core/algorithms/ind/ind_algorithm.cpp b/src/core/algorithms/ind/ind_algorithm.cpp index 2b75b8f5cd..e4cc48307f 100644 --- a/src/core/algorithms/ind/ind_algorithm.cpp +++ b/src/core/algorithms/ind/ind_algorithm.cpp @@ -14,10 +14,7 @@ INDAlgorithm::INDAlgorithm(std::vector phase_names) void INDAlgorithm::LoadDataInternal() { schemas_ = std::make_shared>(); for (auto const& input_table : input_tables_) { - auto& schema = schemas_->emplace_back(input_table->GetRelationName()); - for (size_t i{0}; i < input_table->GetNumberOfColumns(); ++i) { - schema.AppendColumn(input_table->GetColumnName(i)); - } + schemas_->emplace_back(input_table->GetRelationName(), input_table->GetColumnNames()); } LoadINDAlgorithmDataInternal(); diff --git a/src/core/algorithms/ucc/hyucc/validator.cpp b/src/core/algorithms/ucc/hyucc/validator.cpp index 902e002a87..191315526f 100644 --- a/src/core/algorithms/ucc/hyucc/validator.cpp +++ b/src/core/algorithms/ucc/hyucc/validator.cpp @@ -8,6 +8,7 @@ #include "fd/hycommon/efficiency_threshold.h" #include "fd/hycommon/validator_helpers.h" #include "ucc/hyucc/model/ucc_tree_vertex.h" +#include "util/bitset_utils.h" namespace { diff --git a/src/core/model/table/column.cpp b/src/core/model/table/column.cpp index 52f7e2c504..e7cec48b51 100644 --- a/src/core/model/table/column.cpp +++ b/src/core/model/table/column.cpp @@ -1,5 +1,6 @@ #include "column.h" +#include "model/table/relational_schema.h" #include "vertical.h" using namespace std; diff --git a/src/core/model/table/column.h b/src/core/model/table/column.h index 41edfeb144..03d7ca9cf4 100644 --- a/src/core/model/table/column.h +++ b/src/core/model/table/column.h @@ -6,7 +6,9 @@ #include #include "column_index.h" -#include "relational_schema.h" + +class RelationalSchema; +class Vertical; class Column { friend RelationalSchema; diff --git a/src/core/model/table/column_layout_relation_data.cpp b/src/core/model/table/column_layout_relation_data.cpp index dc948a3946..da3940d2e5 100644 --- a/src/core/model/table/column_layout_relation_data.cpp +++ b/src/core/model/table/column_layout_relation_data.cpp @@ -21,7 +21,7 @@ std::vector ColumnLayoutRelationData::GetTuple(int tuple_index) const { std::unique_ptr ColumnLayoutRelationData::CreateFrom( model::IDatasetStream& data_stream, bool is_null_eq_null) { - auto schema = std::make_unique(data_stream.GetRelationName()); + auto schema = RelationalSchema::CreateFrom(data_stream); std::unordered_map value_dictionary; int next_value_id = 1; int const null_value_id = kNullValueId; @@ -59,13 +59,9 @@ std::unique_ptr ColumnLayoutRelationData::CreateFrom( std::vector column_data; for (size_t i = 0; i < num_columns; ++i) { - auto column = Column(schema.get(), data_stream.GetColumnName(i), i); - schema->AppendColumn(std::move(column)); auto pli = model::PositionListIndex::CreateFor(column_vectors[i], is_null_eq_null); - column_data.emplace_back(schema->GetColumn(i), std::move(pli)); + column_data.emplace_back(&schema->GetColumn(i), std::move(pli)); } - schema->Init(); - return std::make_unique(std::move(schema), std::move(column_data)); } diff --git a/src/core/model/table/column_layout_typed_relation_data.cpp b/src/core/model/table/column_layout_typed_relation_data.cpp index 20f13403f1..ad1dc2c285 100644 --- a/src/core/model/table/column_layout_typed_relation_data.cpp +++ b/src/core/model/table/column_layout_typed_relation_data.cpp @@ -6,7 +6,7 @@ namespace model { std::unique_ptr ColumnLayoutTypedRelationData::CreateFrom( IDatasetStream& data_stream, bool is_null_eq_null) { - auto schema = std::make_unique(data_stream.GetRelationName()); + auto schema = RelationalSchema::CreateFrom(data_stream); size_t const num_columns = data_stream.GetNumberOfColumns(); std::vector> columns(num_columns); @@ -32,15 +32,11 @@ std::unique_ptr ColumnLayoutTypedRelationData::Cr std::vector column_data; for (size_t i = 0; i < num_columns; ++i) { - Column column(schema.get(), data_stream.GetColumnName(i), i); - schema->AppendColumn(std::move(column)); TypedColumnData typed_column_data = model::TypedColumnDataFactory::CreateFrom( - schema->GetColumn(i), std::move(columns[i]), is_null_eq_null); + &schema->GetColumn(i), std::move(columns[i]), is_null_eq_null); column_data.emplace_back(std::move(typed_column_data)); } - schema->Init(); - return std::make_unique(std::move(schema), std::move(column_data)); } diff --git a/src/core/model/table/idataset_stream.h b/src/core/model/table/idataset_stream.h index 85659466b1..2431f0cb14 100644 --- a/src/core/model/table/idataset_stream.h +++ b/src/core/model/table/idataset_stream.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -9,11 +10,21 @@ class IDatasetStream { public: virtual std::vector GetNextRow() = 0; [[nodiscard]] virtual bool HasNextRow() const = 0; - [[nodiscard]] virtual size_t GetNumberOfColumns() const = 0; - [[nodiscard]] virtual std::string GetColumnName(size_t index) const = 0; + [[nodiscard]] virtual std::size_t GetNumberOfColumns() const = 0; + [[nodiscard]] virtual std::string GetColumnName(std::size_t index) const = 0; [[nodiscard]] virtual std::string GetRelationName() const = 0; virtual void Reset() = 0; virtual ~IDatasetStream() = default; + + std::vector GetColumnNames() const { + std::vector column_names; + std::size_t const number_of_columns = GetNumberOfColumns(); + column_names.reserve(number_of_columns); + for (std::size_t i = 0; i < number_of_columns; ++i) { + column_names.push_back(GetColumnName(i)); + } + return column_names; + } }; } // namespace model diff --git a/src/core/model/table/relational_schema.cpp b/src/core/model/table/relational_schema.cpp index d47ab40947..d172e0060a 100644 --- a/src/core/model/table/relational_schema.cpp +++ b/src/core/model/table/relational_schema.cpp @@ -3,49 +3,51 @@ #include #include -#include "vertical.h" #include "vertical_map.h" -RelationalSchema::RelationalSchema(std::string name) - : columns_(), name_(std::move(name)), empty_vertical_() { - Init(); +namespace { +auto MakeColumns(RelationalSchema* schema, std::vector column_names) { + std::vector columns; + std::size_t const number_of_columns = column_names.size(); + columns.reserve(number_of_columns); + for (model::ColumnIndex i = 0; i < number_of_columns; ++i) { + columns.emplace_back(schema, column_names[i], i); + } + return columns; } - -void RelationalSchema::Init() { - empty_vertical_ = Vertical::EmptyVertical(this); +} // namespace + +RelationalSchema::RelationalSchema(std::string name, std::vector column_names) + : columns_(MakeColumns(this, std::move(column_names))), + name_(std::move(name)), + empty_vertical_(std::make_unique(this, boost::dynamic_bitset<>(columns_.size()))) {} + +std::unique_ptr RelationalSchema::CreateFrom(model::IDatasetStream& table) { + std::vector column_names; + std::size_t const number_of_columns = table.GetNumberOfColumns(); + column_names.reserve(number_of_columns); + for (model::ColumnIndex i = 0; i < number_of_columns; ++i) { + column_names.push_back(table.GetColumnName(i)); + } + return std::make_unique(table.GetRelationName(), std::move(column_names)); } -// TODO: В оригинале тут что-то непонятное + приходится пересоздавать empty_vertical_ -- тут -// должен быть unique_ptr, тк создаём в остальных случаях новую вершину и выдаём наружу с овнершипом Vertical RelationalSchema::GetVertical(boost::dynamic_bitset<> indices) const { - if (indices.empty()) return *Vertical::EmptyVertical(this); - - if (indices.count() == 1) { - return Vertical(this, std::move(indices)); - } return Vertical(this, std::move(indices)); } -Column const* RelationalSchema::GetColumn(std::string const& col_name) const { +Column const& RelationalSchema::GetColumn(std::string const& col_name) const { auto found_entry_iterator = std::find_if(columns_.begin(), columns_.end(), - [&col_name](auto& column) { return column->name_ == col_name; }); - if (found_entry_iterator != columns_.end()) return found_entry_iterator->get(); + [&col_name](auto& column) { return column.name_ == col_name; }); + if (found_entry_iterator != columns_.end()) return *found_entry_iterator; throw std::invalid_argument("Couldn't match column name \'" + col_name + "\' to any of the schema's column names"); } -Column const* RelationalSchema::GetColumn(size_t index) const { - return columns_.at(index).get(); -} - -void RelationalSchema::AppendColumn(std::string const& col_name) { - columns_.push_back(std::make_unique(this, col_name, columns_.size())); -} - -void RelationalSchema::AppendColumn(Column column) { - columns_.push_back(std::make_unique(std::move(column))); +Column const& RelationalSchema::GetColumn(size_t index) const { + return columns_.at(index); } size_t RelationalSchema::GetNumColumns() const { @@ -87,7 +89,7 @@ std::unordered_set RelationalSchema::CalculateHittingSet( corrective_column_index != boost::dynamic_bitset<>::npos; corrective_column_index = vertical.GetColumnIndices().find_next(corrective_column_index)) { - auto corrective_column = *GetColumn(corrective_column_index); + auto corrective_column = GetColumn(corrective_column_index); auto corrected_member = invalid_member.Union(static_cast(corrective_column)); diff --git a/src/core/model/table/relational_schema.h b/src/core/model/table/relational_schema.h index 9680aed401..8808cab6f7 100644 --- a/src/core/model/table/relational_schema.h +++ b/src/core/model/table/relational_schema.h @@ -5,8 +5,6 @@ #pragma once -#include -#include #include #include #include @@ -15,45 +13,41 @@ #include #include -#include "bitset_utils.h" - -class Column; - -class Vertical; +#include "model/table/column.h" +#include "model/table/idataset_stream.h" +#include "model/table/vertical.h" +#include "util/bitset_utils.h" class RelationalSchema { private: - std::vector> columns_; + std::vector columns_; std::string name_; public: std::unique_ptr empty_vertical_; - RelationalSchema(std::string name); + RelationalSchema(std::string name, std::vector column_names); RelationalSchema(RelationalSchema const& other) = delete; RelationalSchema& operator=(RelationalSchema const& rhs) = delete; RelationalSchema(RelationalSchema&& other) noexcept = default; RelationalSchema& operator=(RelationalSchema&& rhs) noexcept = default; - void Init(); + static std::unique_ptr CreateFrom(model::IDatasetStream& table); std::string GetName() const { return name_; } - std::vector> const& GetColumns() const { + std::vector const& GetColumns() const { return columns_; }; - Column const* GetColumn(std::string const& col_name) const; - Column const* GetColumn(size_t index) const; + Column const& GetColumn(std::string const& col_name) const; + Column const& GetColumn(size_t index) const; size_t GetNumColumns() const; Vertical GetVertical(boost::dynamic_bitset<> indices) const; - void AppendColumn(std::string const& col_name); - void AppendColumn(Column column); - template boost::dynamic_bitset<> IndicesToBitset(Container const& indices) const; template diff --git a/src/core/model/table/vertical.cpp b/src/core/model/table/vertical.cpp index 5817a0abaf..562d6450c2 100644 --- a/src/core/model/table/vertical.cpp +++ b/src/core/model/table/vertical.cpp @@ -2,6 +2,8 @@ #include +#include "model/table/relational_schema.h" + Vertical::Vertical(RelationalSchema const* rel_schema, boost::dynamic_bitset<> indices) : column_indices_(std::move(indices)), schema_(rel_schema) {} @@ -78,7 +80,7 @@ std::vector Vertical::GetColumns() const { std::vector columns; for (size_t index = column_indices_.find_first(); index != boost::dynamic_bitset<>::npos; index = column_indices_.find_next(index)) { - columns.push_back(schema_->GetColumns()[index].get()); + columns.push_back(&schema_->GetColumns()[index]); } return columns; } @@ -87,7 +89,7 @@ std::vector Vertical::GetColumnIndicesAsVector() const { std::vector columns; for (size_t index = column_indices_.find_first(); index != boost::dynamic_bitset<>::npos; index = column_indices_.find_next(index)) { - columns.push_back(schema_->GetColumns()[index].get()->GetIndex()); + columns.push_back(schema_->GetColumns()[index].GetIndex()); } return columns; } @@ -99,7 +101,7 @@ std::string Vertical::ToString() const { for (size_t index = column_indices_.find_first(); index != boost::dynamic_bitset<>::npos; index = column_indices_.find_next(index)) { - result += schema_->GetColumn(index)->GetName(); + result += schema_->GetColumn(index).GetName(); if (column_indices_.find_next(index) != boost::dynamic_bitset<>::npos) { result += ' '; } diff --git a/src/core/model/table/vertical.h b/src/core/model/table/vertical.h index 672622b50e..4b8381f74d 100644 --- a/src/core/model/table/vertical.h +++ b/src/core/model/table/vertical.h @@ -13,6 +13,8 @@ #include "column.h" +class RelationalSchema; + class Vertical { private: // Vertical(shared_ptr& relSchema, int indices); diff --git a/src/python_bindings/py_util/dataframe_reader.cpp b/src/python_bindings/py_util/dataframe_reader.cpp index 3013df0b57..70fa5fc66e 100644 --- a/src/python_bindings/py_util/dataframe_reader.cpp +++ b/src/python_bindings/py_util/dataframe_reader.cpp @@ -11,10 +11,8 @@ #include "model/types/builtin.h" -namespace python_bindings { - +namespace { namespace py = pybind11; - static std::vector GetColumnNames(py::handle dataframe) { std::vector names; py::list name_lst = dataframe.attr("columns").attr("to_list")(); @@ -23,12 +21,15 @@ static std::vector GetColumnNames(py::handle dataframe) { } return names; } +} // namespace + +namespace python_bindings { DataframeReaderBase::DataframeReaderBase(py::handle dataframe, std::string name) : dataframe_(py::reinterpret_borrow(dataframe)), df_iter_(dataframe_.attr("itertuples")(false, py::none{})), name_(std::move(name)), - column_names_(GetColumnNames(dataframe_)) {} + column_names_(::GetColumnNames(dataframe_)) {} void DataframeReaderBase::Reset() { df_iter_ = dataframe_.attr("itertuples")(false, py::none{}); diff --git a/src/tests/test_cfd_algos.cpp b/src/tests/test_cfd_algos.cpp index 809ffe803e..2559f2c14d 100644 --- a/src/tests/test_cfd_algos.cpp +++ b/src/tests/test_cfd_algos.cpp @@ -28,7 +28,7 @@ class CFDAlgorithmTest : public ::testing::Test { protected: static std::unique_ptr CreateAlgorithmInstance( CSVConfig const& csv_config, unsigned minsup, double minconf, char const* substrategy, - unsigned int max_lhs, unsigned columns_number = 0, unsigned tuples_number = 0) { + unsigned int max_lhs, size_t columns_number = 0, size_t tuples_number = 0) { using namespace config::names; algos::StdParamsMap params{ diff --git a/src/tests/test_typo_miner.cpp b/src/tests/test_typo_miner.cpp index 856a7df8c7..6f09cbe9f4 100644 --- a/src/tests/test_typo_miner.cpp +++ b/src/tests/test_typo_miner.cpp @@ -279,7 +279,7 @@ TEST_P(LinesWithTyposMiningTest, FindLinesWithTypos) { assert(fd_by_indices.size() > 1); auto bitset = schema->IndicesToBitset(fd_by_indices.cbegin(), std::prev(fd_by_indices.cend())); - FD fd(schema->GetVertical(std::move(bitset)), *schema->GetColumn(fd_by_indices.back())); + FD fd(schema->GetVertical(std::move(bitset)), schema->GetColumn(fd_by_indices.back())); for (auto const& [cluster, typos] : clusters_with_typos) { std::vector const actual = typo_miner->FindLinesWithTypos(fd, cluster, p.radius, p.ratio);