diff --git a/README.md b/README.md index 043e459..f598a4c 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,9 @@ 由于比赛中写的架构较为简略,所以在这里重构一下这个kv引擎。 smallkv 是一个列存的、基于LSM架构的存储引擎。 -项目正在疯狂迭代中!! +**项目正在疯狂迭代中!!** + +--- ## 进度 - [ ] memtable: 跳表 @@ -25,10 +27,10 @@ smallkv 是一个列存的、基于LSM架构的存储引擎。 - [ ] 写流程 - [ ] Compaction模块 - +--- ## build -必须使用g++编译器 +You must use the g++ compiler and Ubuntu 22.04 system. ### build from source code: ```shell # 安装依赖 @@ -37,7 +39,7 @@ apt update && apt upgrade -y && apt install cmake make git g++ gcc -y && cd ~ \ && git clone https://github.com/google/googletest && cd googletest && mkdir build && cd build && cmake .. && make -j && sudo make install && cd ~ \ && git clone https://github.com/nlohmann/json && cd json && mkdir build && cd build && cmake .. && make -j && sudo make install && cd ~ \ && rm -rf spdlog googletest json -./build.sh ## 编译 +./build.sh ## 编译 ./main_run.sh ## 主程序 ./unittest_run.sh ## 单元测试 ``` @@ -46,26 +48,30 @@ apt update && apt upgrade -y && apt install cmake make git g++ gcc -y && cd ~ \ cd docker docker build -t smallkv-testenv . # 需要几分钟 docker run -it -v /{smallkv代码所在的目录}:/test smallkv-testenv /bin/bash -./build.sh ## 编译 +./build.sh ## 编译 ./main_run.sh ## 主程序 ./unittest_run.sh ## 单元测试 ``` - +--- ## 设计 -1. 内存池设计 - +1. **内存池设计** ![mem_pool](./img/mem_pool_design.png) -2. 缓存设计 +2. **缓存设计** ![cache](./img/cache_design.png) -Cache中持有N(默认为5)个指向CachePolicy的指针,相当于5个分片,可以减少哈希冲突以及减少锁的范围;LRUCache和LFUCache都是CachePolicy的子类。 +Cache中持有N(默认为5)个指向CachePolicy的指针,相当于5个分片,可以减少哈希冲突以及减少锁的范围;LRUCache和LFUCache都是CachePolicy的子类。 + +3. **SStable设计** +todo +--- ## 第三方依赖: 1. [spdlog](https://github.com/gabime/spdlog) 2. [gtest](https://github.com/google/googletest) 3. [nlohmann/json](https://github.com/nlohmann/json) +--- ## 参考: 1. [阿里云NewSQL数据库大赛](https://tianchi.aliyun.com/competition/entrance/531980/introduction) diff --git a/src/file/file_reader.h b/src/file/file_reader.h index ecdcc7f..0d1268b 100644 --- a/src/file/file_reader.h +++ b/src/file/file_reader.h @@ -14,7 +14,8 @@ namespace smallkv { ~FileReader(); // 类似unistd中的pread - DBStatus read(void *buf, int32_t count, int32_t offset) const; + // 从offset处开始,读取count长度的内容到buf中 + DBStatus read(void *buf, int32_t count, int32_t offset = 0) const; private: int fd{0}; diff --git a/src/table/data_block_builder.cpp b/src/table/data_block_builder.cpp index 4699032..bbc6082 100644 --- a/src/table/data_block_builder.cpp +++ b/src/table/data_block_builder.cpp @@ -77,6 +77,7 @@ namespace smallkv { utils::PutFixed32(_data, _record_num); utils::PutFixed64(_data, *reinterpret_cast(&_offsetInfo)); } + // 重启点数量 utils::PutFixed32(_data, restart_point_num); return Status::Success; } diff --git a/src/table/footer_builder.h b/src/table/footer_builder.h index 20f541e..fbe7a73 100644 --- a/src/table/footer_builder.h +++ b/src/table/footer_builder.h @@ -2,6 +2,7 @@ // Created by qianyy on 2023/1/19. // #include +#include "log/log.h" #include "db/offset_info.h" #include "db/status.h" @@ -24,6 +25,8 @@ namespace smallkv { // OffsetInfo index_block_offset_info; std::string _data; + std::shared_ptr logger = log::get_logger(); // logger + public: FooterBuilder() = default; diff --git a/src/table/sstable_builder.cpp b/src/table/sstable_builder.cpp index 96d070b..a9d970d 100644 --- a/src/table/sstable_builder.cpp +++ b/src/table/sstable_builder.cpp @@ -21,6 +21,8 @@ namespace smallkv { dataBlockBuilder = std::make_shared(); indexBlockBuilder = std::make_shared(); filterBlockBuilder = std::make_shared(keys_num, false_positive); + footerBuilder = std::make_shared(); + logger = log::get_logger(); } DBStatus SSTableBuilder::add(const std::string &key, const std::string &val) { @@ -105,6 +107,7 @@ namespace smallkv { dataBlockBuilder->clear(); // 持久化完成后,清空当前dataBlockBuilder } + filterBlockBuilder->finish_filter_block(); // 保存FilterBlock的size FilterBlock_offset.size = static_cast(filterBlockBuilder->data().size()); @@ -127,13 +130,16 @@ namespace smallkv { // 持久化 fileWriter->flush(); + fileWriter->close(); // 清空字段 dataBlockBuilder->clear(); indexBlockBuilder->clear(); filterBlockBuilder = std::make_shared( _keys_num, _false_positive); - //todo : fileWriter字段如何清空??这里存在设计缺陷!! + // todo : fileWriter字段如何清空??这里存在设计缺陷!! + // 关于上面todo的解释:每个.sst文件只会放一个SSTable,所以在SSTableBuilder外部建一个FileWriter指针, + // 然后传给SSTableBuilder来创建这个文件,创建完成后,这个FileWriter指针应该会自动销毁 footerBuilder->clear(); FilterBlock_offset.clear(); diff --git a/src/table/sstable_builder.h b/src/table/sstable_builder.h index d294f79..b527378 100644 --- a/src/table/sstable_builder.h +++ b/src/table/sstable_builder.h @@ -4,8 +4,9 @@ #include #include #include -#include "../db/status.h" -#include "../db/offset_info.h" +#include "log/log.h" +#include "db/status.h" +#include "db/offset_info.h" #ifndef SMALLKV_SSTABLE_BUILDER_H #define SMALLKV_SSTABLE_BUILDER_H @@ -56,6 +57,8 @@ namespace smallkv { std::shared_ptr fileWriter = nullptr; // 操作SST的落盘 std::shared_ptr footerBuilder = nullptr; // 操作Footer + std::shared_ptr logger; // logger + // 分别保存FilterBlock、IndexBlock的offset信息,然后保存在Footer中 OffsetInfo FilterBlock_offset{0, 0}; OffsetInfo IndexBlock_offset{0, 0}; diff --git a/tests/test_file_writer.cpp b/tests/test_file_writer.cpp index 7759825..c318f89 100644 --- a/tests/test_file_writer.cpp +++ b/tests/test_file_writer.cpp @@ -17,9 +17,9 @@ namespace smallkv { std::string data1 = "123gfds4h6.1 s0 3ds 4g00 x_"; std::string data2 = " 7112gdf455 4 56 u 455u 123fgh12fgn "; std::string data3 = ".*-123 34fe r65 4r1 0m1 j0.10,/,."; - fwriter->append(data1.data(), data1.size()); - fwriter->append(data2.data(), data2.size()); - fwriter->append(data3.data(), data3.size()); + fwriter->append(data1.data(), static_cast(data1.size())); + fwriter->append(data2.data(), static_cast(data2.size())); + fwriter->append(data3.data(), static_cast(data3.size())); fwriter->sync(); fwriter->close(); auto _ = std::system(("rm -rf " + path).c_str()); diff --git a/tests/test_sstable_builder.cpp b/tests/test_sstable_builder.cpp new file mode 100644 index 0000000..91c8900 --- /dev/null +++ b/tests/test_sstable_builder.cpp @@ -0,0 +1,157 @@ +// +// Created by qianyy on 2023/1/17. +// +#include +#include +#include "../src/table/sstable_builder.h" +#include "../src/file/file_writer.h" +#include "../src/file/file_reader.h" +#include "../src/utils/codec.h" + +namespace smallkv { + /* + * SST结构 + * SSTable的数据排布schema: + * +----------------+ + * | DataBlock_1 | + * +----------------+ + * | ... | + * +----------------+ + * | DataBlock_N | + * +----------------+ + * | MetaBlock | + * +----------------+ + * | IndexBlock_1 | + * +----------------+ + * | ... | + * +----------------+ + * | IndexBlock_N | + * +----------------+ + * | Footer | + * +----------------+ + * + * DataBlock数据排布如下: + * +---------------+ + * | Record_1 | + * +---------------+ + * | ... | + * +---------------+ + * | Record_N | + * +---------------+ + * | Restart_1 | + * +---------------+ + * | ... | + * +---------------+ + * | Restart_K | + * +---------------+ + * | Restart_Num | + * +---------------+ + * | Restart_Offset| + * +---------------+ + * + * Record的schema: + * +--------------------+----------------------+---------------+----------------------+---------------+ + * | shared_key_len(4B) | unshared_key_len(4B) | value_len(4B) | unshared_key_content | value_content | + * +--------------------+----------------------+---------------+----------------------+---------------+ + * + * Restart_Point的schema如下: + * +----------------+----------------+ + * | record_num(4B) | OffsetInfo(8B) | + * +----------------+----------------+ + * + * IndexBlock schema + * +------------------------+---------------+-----------------+ + * | _shortest_key_size(4B) | _shortest_key | _offsetInfo(8B) | + * +------------------------+---------------+-----------------+ + * + * Footer模块数据schema如下所示: + * +---------------------------+----------------------------+ + * | MetaBlock_OffsetInfo (8B) | IndexBlock_OffsetInfo (8B) | + * +---------------------------+----------------------------+ + * */ + TEST(SSTableBuilder, basic) { + /* + * 本例中: + * SSTable: + * +----------------------+------------------------+-----------------+-------+-------+ + * | shared_key_len=0(4B) | unshared_key_len=5(4B) | value_len=5(4B) | key_0 | val_0 | [DataBlock的Record] = 22B + * +----------------------+------------------------+-----------------+-------+-------+ + * +------------------+-----------------------------------+ + * | record_num=1(4B) | OffsetInfo={size=22,offset=0}(8B) | [DataBlock的RestartPoint] = 12B + * +------------------+-----------------------------------+ + * +---------------------+ + * | Restart_Num=1(4B) | [DataBlock的Restart_Num] = 4B + * +---------------------+ + * +---------------------+ + * | filter | [MetaBlock的Filter] = 124B + * +---------------------+ + * +--------------------------+---------------------+------------------------------------+ + * | _shortest_key_size=5(4B) | _shortest_key=key_1 | _offsetInfo={size=38,offset=0}(8B) | [IndexBlock] = 17B + * +--------------------------+---------------------+------------------------------------+ + * +------------------------------------------------+-------------------------------------------------+ + * | MetaBlock_OffsetInfo={size=124,offset=38} (8B) | IndexBlock_OffsetInfo={size=17,offset=162} (8B) | [Footer] = 16B + * +------------------------------------------------+-------------------------------------------------+ + * + * + * */ + auto logger = log::get_logger(); + const std::string path = "./build/test_SSTableBuilder.sst"; + auto _fileWriter = std::make_shared(path); + auto sstableBuilder = std::make_unique(100, _fileWriter, 0.01); + sstableBuilder->add("key_0", "val_0"); + sstableBuilder->finish_sst(); + + auto _fileReader = std::make_shared(path); + char buf[195]; + _fileReader->read(buf, 195, 0); + std::string buf_s(buf, 195); + + // 解析[DataBlock的Record] = 22B + auto shared_key_len = static_cast(utils::DecodeFixed32(buf)); + auto unshared_key_len = static_cast(utils::DecodeFixed32(buf + 4)); + auto value_len = static_cast(utils::DecodeFixed32(buf + 8)); + auto key_0 = buf_s.substr(12, 5); + auto val_0 = buf_s.substr(17, 5); + EXPECT_EQ(shared_key_len, 0); + EXPECT_EQ(unshared_key_len, 5); + EXPECT_EQ(value_len, 5); + EXPECT_EQ(key_0, "key_0"); + EXPECT_EQ(val_0, "val_0"); + + // 解析 [DataBlock的RestartPoint] = 12B + auto record_num = static_cast(utils::DecodeFixed32(buf + 22)); + auto Restart_Point_OffsetInfo = reinterpret_cast(buf + 26); + EXPECT_EQ(record_num, 1); + EXPECT_EQ(Restart_Point_OffsetInfo->size, 22); + EXPECT_EQ(Restart_Point_OffsetInfo->offset, 0); + + // 解析 [DataBlock的Restart_Num] = 4B + auto Restart_Num = static_cast(utils::DecodeFixed32(buf + 34)); + EXPECT_EQ(Restart_Num, 1); + + // 解析 filter + // key_num设置为100, false_positive=0.01的情况下: + // bits_array的长度为120,并且结尾有一个4B存储了哈希函数数量(为6),共124B + auto hash_func_num = static_cast(utils::DecodeFixed32(buf + 158)); + EXPECT_EQ(hash_func_num, 6); + + // 解析 [IndexBlock] = 17B + auto _shortest_key_size = static_cast(utils::DecodeFixed32(buf + 162)); + auto _shortest_key = buf_s.substr(166, 5); + auto IndexBlock_offsetInfo = reinterpret_cast(buf + 171); + EXPECT_EQ(_shortest_key_size, 5); + EXPECT_EQ(_shortest_key, "key_1"); + EXPECT_EQ(IndexBlock_offsetInfo->size, 38); + EXPECT_EQ(IndexBlock_offsetInfo->offset, 0); + + // 解析 [Footer] = 16B + auto MetaBlock_OffsetInfo = reinterpret_cast(buf + 179); + auto IndexBlock_OffsetInfo = reinterpret_cast(buf + 179 + 8); + EXPECT_EQ(MetaBlock_OffsetInfo->size, 124); + EXPECT_EQ(MetaBlock_OffsetInfo->offset, 38); + EXPECT_EQ(IndexBlock_OffsetInfo->size, 17); + EXPECT_EQ(IndexBlock_OffsetInfo->offset, 162); + + auto _ = system("rm -rf ./build/test_SSTableBuilder.sst"); + } +} \ No newline at end of file