Skip to content

Commit

Permalink
Fall 2024 Project 3 (#762)
Browse files Browse the repository at this point in the history
* feat(p3): introduce external merge sort & remove several executors

* new line at eof

* refactor OrderBy

* introduce external merge sort executor

* introduce tuple comparator

* add comments for 2-way merge sort requirement

* make sort plan node format as external merge sort

* fix lint

* sync private for test sort

* sync private

* update submission files

* update p4 submission files

* initializer -> constructor

* update tests and shared ptr for IndexInfo and TableInfo

* update TxnMgrDbg calls

* fix format errors

* sync private

* sync private

* sync private

* redistribute points

* rename leaderboard test file

---------

Co-authored-by: Yash Kothari <[email protected]>
  • Loading branch information
xx01cyx and yashkothari42 authored Oct 28, 2024
1 parent 1f1a8a0 commit 2dd141e
Show file tree
Hide file tree
Showing 34 changed files with 459 additions and 211 deletions.
18 changes: 6 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -324,37 +324,33 @@ set(P3_FILES
"src/include/execution/executors/index_scan_executor.h"
"src/include/execution/executors/insert_executor.h"
"src/include/execution/executors/limit_executor.h"
"src/include/execution/executors/nested_index_join_executor.h"
"src/include/execution/executors/nested_loop_join_executor.h"
"src/include/execution/executors/seq_scan_executor.h"
"src/include/execution/executors/sort_executor.h"
"src/include/execution/executors/topn_executor.h"
"src/include/execution/executors/topn_per_group_executor.h"
"src/include/execution/executors/external_merge_sort_executor.h"
"src/include/execution/executors/update_executor.h"
"src/include/execution/executors/window_function_executor.h"
"src/execution/aggregation_executor.cpp"
"src/execution/window_function_executor.cpp"
"src/execution/delete_executor.cpp"
"src/execution/filter_executor.cpp"
"src/execution/hash_join_executor.cpp"
"src/execution/index_scan_executor.cpp"
"src/execution/insert_executor.cpp"
"src/execution/limit_executor.cpp"
"src/execution/nested_index_join_executor.cpp"
"src/execution/nested_loop_join_executor.cpp"
"src/execution/seq_scan_executor.cpp"
"src/execution/sort_executor.cpp"
"src/execution/topn_executor.cpp"
"src/execution/topn_per_group_executor.cpp"
"src/execution/external_merge_sort_executor.cpp"
"src/execution/update_executor.cpp"
"src/include/execution/execution_common.h"
"src/include/optimizer/optimizer.h"
"src/include/optimizer/optimizer_internal.h"
"src/execution/execution_common.cpp"
"src/optimizer/nlj_as_hash_join.cpp"
"src/optimizer/optimizer_custom_rules.cpp"
"src/optimizer/sort_limit_as_topn.cpp"
"src/optimizer/optimizer_internal.cpp"
"src/optimizer/seqscan_as_indexscan.cpp"
"src/optimizer/column_pruning.cpp"
"src/common/bustub_ddl.cpp"
"src/include/execution/plans/topn_per_group_plan.h"
${P2_FILES}
)

Expand All @@ -375,8 +371,6 @@ set(P4_FILES
"src/concurrency/transaction_manager.cpp"
"src/include/concurrency/watermark.h"
"src/concurrency/watermark.cpp"
"src/include/execution/execution_common.h"
"src/execution/execution_common.cpp"
${P3_FILES}
)

Expand Down
2 changes: 1 addition & 1 deletion src/catalog/table_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ auto TableGenerator::MakeValues(ColumnInsertMeta *col_meta, uint32_t count) -> s
}
}

void TableGenerator::FillTable(TableInfo *info, TableInsertMeta *table_meta) {
void TableGenerator::FillTable(const std::shared_ptr<TableInfo> &info, TableInsertMeta *table_meta) {
uint32_t num_inserted = 0;
uint32_t batch_size = 128;
while (num_inserted < table_meta->num_rows_) {
Expand Down
4 changes: 2 additions & 2 deletions src/common/bustub_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ namespace bustub {
void BusTubInstance::HandleCreateStatement(Transaction *txn, const CreateStatement &stmt, ResultWriter &writer) {
std::unique_lock<std::shared_mutex> l(catalog_lock_);
auto info = catalog_->CreateTable(txn, stmt.table_, Schema(stmt.columns_));
IndexInfo *index = nullptr;
std::shared_ptr<IndexInfo> index = nullptr;
if (!stmt.primary_key_.empty()) {
std::vector<uint32_t> col_ids;
for (const auto &col : stmt.primary_key_) {
Expand Down Expand Up @@ -106,7 +106,7 @@ void BusTubInstance::HandleIndexStatement(Transaction *txn, const IndexStatement
}

std::unique_lock<std::shared_mutex> l(catalog_lock_);
IndexInfo *info = nullptr;
std::shared_ptr<IndexInfo> info = nullptr;

if (stmt.index_type_.empty()) {
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
Expand Down
6 changes: 3 additions & 3 deletions src/common/bustub_instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ void BusTubInstance::CmdDbgMvcc(const std::vector<std::string> &params, ResultWr
writer.OneCell("table " + table + " not found");
return;
}
TxnMgrDbg("\\dbgmvcc", txn_manager_.get(), table_info, table_info->table_.get());
TxnMgrDbg("\\dbgmvcc", txn_manager_.get(), table_info.get(), table_info->table_.get());
}

void BusTubInstance::CmdDisplayTables(ResultWriter &writer) {
Expand All @@ -175,7 +175,7 @@ void BusTubInstance::CmdDisplayTables(ResultWriter &writer) {
writer.EndHeader();
for (const auto &name : table_names) {
writer.BeginRow();
const auto *table_info = catalog_->GetTable(name);
const auto table_info = catalog_->GetTable(name);
writer.WriteCell(fmt::format("{}", table_info->oid_));
writer.WriteCell(table_info->name_);
writer.WriteCell(table_info->schema_.ToString());
Expand All @@ -194,7 +194,7 @@ void BusTubInstance::CmdDisplayIndices(ResultWriter &writer) {
writer.WriteHeaderCell("index_cols");
writer.EndHeader();
for (const auto &table_name : table_names) {
for (const auto *index_info : catalog_->GetTableIndexes(table_name)) {
for (const auto &index_info : catalog_->GetTableIndexes(table_name)) {
writer.BeginRow();
writer.WriteCell(table_name);
writer.WriteCell(fmt::format("{}", index_info->index_oid_));
Expand Down
1 change: 1 addition & 0 deletions src/execution/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ add_library(
aggregation_executor.cpp
delete_executor.cpp
execution_common.cpp
external_merge_sort_executor.cpp
executor_factory.cpp
filter_executor.cpp
fmt_impl.cpp
Expand Down
13 changes: 13 additions & 0 deletions src/execution/execution_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,19 @@

namespace bustub {

TupleComparator::TupleComparator(std::vector<OrderBy> order_bys) : order_bys_(std::move(order_bys)) {}

auto TupleComparator::operator()(const SortEntry &entry_a, const SortEntry &entry_b) const -> bool { return false; }

auto GenerateSortKey(const Tuple &tuple, const std::vector<OrderBy> &order_bys, const Schema &schema) -> SortKey {
return {};
}

/**
* Above are all you need for P3.
* You can ignore the remaining part of this file until P4.
*/

auto ReconstructTuple(const Schema *schema, const Tuple &base_tuple, const TupleMeta &base_meta,
const std::vector<UndoLog> &undo_logs) -> std::optional<Tuple> {
UNIMPLEMENTED("not implemented");
Expand Down
3 changes: 2 additions & 1 deletion src/execution/executor_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "execution/executors/abstract_executor.h"
#include "execution/executors/aggregation_executor.h"
#include "execution/executors/delete_executor.h"
#include "execution/executors/external_merge_sort_executor.h"
#include "execution/executors/filter_executor.h"
#include "execution/executors/hash_join_executor.h"
#include "execution/executors/index_scan_executor.h"
Expand Down Expand Up @@ -166,7 +167,7 @@ auto ExecutorFactory::CreateExecutor(ExecutorContext *exec_ctx, const AbstractPl
case PlanType::Sort: {
const auto *sort_plan = dynamic_cast<const SortPlanNode *>(plan.get());
auto child = ExecutorFactory::CreateExecutor(exec_ctx, sort_plan->GetChildPlan());
return std::make_unique<SortExecutor>(exec_ctx, sort_plan, std::move(child));
return std::make_unique<ExternalMergeSortExecutor<2>>(exec_ctx, sort_plan, std::move(child));
}

// Create a new topN executor
Expand Down
39 changes: 39 additions & 0 deletions src/execution/external_merge_sort_executor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//===----------------------------------------------------------------------===//
//
// BusTub
//
// external_merge_sort_executor.cpp
//
// Identification: src/execution/external_merge_sort_executor.cpp
//
// Copyright (c) 2015-2024, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#include "execution/executors/external_merge_sort_executor.h"
#include <iostream>
#include <optional>
#include <vector>
#include "common/config.h"
#include "execution/plans/sort_plan.h"

namespace bustub {

template <size_t K>
ExternalMergeSortExecutor<K>::ExternalMergeSortExecutor(ExecutorContext *exec_ctx, const SortPlanNode *plan,
std::unique_ptr<AbstractExecutor> &&child_executor)
: AbstractExecutor(exec_ctx), cmp_(plan->GetOrderBy()) {}

template <size_t K>
void ExternalMergeSortExecutor<K>::Init() {
throw NotImplementedException("ExternalMergeSortExecutor is not implemented");
}

template <size_t K>
auto ExternalMergeSortExecutor<K>::Next(Tuple *tuple, RID *rid) -> bool {
return false;
}

template class ExternalMergeSortExecutor<2>;

} // namespace bustub
4 changes: 3 additions & 1 deletion src/execution/fmt_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ auto UpdatePlanNode::PlanNodeToString() const -> std::string {
}

auto SortPlanNode::PlanNodeToString() const -> std::string {
return fmt::format("Sort {{ order_bys={} }}", order_bys_);
// Note(f24): A sort plan node will be converted to an external merge sort executor in
// Fall 2024. So `ExternalMergeSort` is returned instead of `Sort`.
return fmt::format("ExternalMergeSort {{ order_bys={} }}", order_bys_);
}

auto LimitPlanNode::PlanNodeToString() const -> std::string { return fmt::format("Limit {{ limit={} }}", limit_); }
Expand Down
2 changes: 1 addition & 1 deletion src/execution/hash_join_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ HashJoinExecutor::HashJoinExecutor(ExecutorContext *exec_ctx, const HashJoinPlan
std::unique_ptr<AbstractExecutor> &&right_child)
: AbstractExecutor(exec_ctx) {
if (!(plan->GetJoinType() == JoinType::LEFT || plan->GetJoinType() == JoinType::INNER)) {
// Note for 2023 Fall: You ONLY need to implement left join and inner join.
// Note for Fall 2024: You ONLY need to implement left join and inner join.
throw bustub::NotImplementedException(fmt::format("join type {} not supported", plan->GetJoinType()));
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/include/binder/bound_order_by.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "binder/bound_expression.h"
#include "common/exception.h"
#include "execution/expressions/abstract_expression.h"
#include "fmt/format.h"

namespace bustub {
Expand All @@ -28,6 +29,8 @@ enum class OrderByType : uint8_t {
DESC = 3, /**< Descending order by type. */
};

using OrderBy = std::pair<OrderByType, AbstractExpressionRef>;

/**
* BoundOrderBy is an item in the ORDER BY clause.
*/
Expand Down
Loading

0 comments on commit 2dd141e

Please sign in to comment.