diff --git a/.gitignore b/.gitignore index a9c1fac..16179e5 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,7 @@ test_temp # others ref -build/* \ No newline at end of file +build/* + +storage +db_storage \ No newline at end of file diff --git a/README.md b/README.md index c8ec55f..76f3cce 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ smallkv 是一个列存的、基于LSM架构的存储引擎。 **项目正在疯狂迭代中!!** --- + ## 进度 - [x] 跳表 @@ -27,13 +28,18 @@ smallkv 是一个列存的、基于LSM架构的存储引擎。 - [ ] 读流程 - [ ] 写流程 - [ ] Compaction模块 +- [ ] 用FreeListAllocate(src/memory/allocate.h)替换系统内存分配器 --- + ## BUILD -You must use the g++ compiler and Ubuntu 22.04 system. +You must use the g++ compiler(with C++ 17 supported) and Ubuntu 22.04 system. + ### build from docker (Highly recommended) + ```shell +git clone git@github.com:yangyang233333/smallkv.git docker pull qianyy2333/smallkv-test docker run -it -v /{smallkv代码所在的目录}:/test qianyy2333/smallkv-test /bin/bash ./build.sh ## 编译 @@ -42,6 +48,7 @@ docker run -it -v /{smallkv代码所在的目录}:/test qianyy2333/smallkv-test ``` ### build from source code: + ```shell # 安装依赖 apt update && apt upgrade -y && apt install cmake make git g++ gcc -y && cd ~ \ @@ -50,40 +57,60 @@ apt update && apt upgrade -y && apt install cmake make git g++ gcc -y && cd ~ \ && git clone https://github.com/nlohmann/json && cd json && mkdir build && cd build && cmake .. && make -j && sudo make install && cd ~ \ && git clone https://github.com/abseil/abseil-cpp.git && cd abseil-cpp && mkdir build && cd build && cmake .. && make -j && make install && cd ~ \ && rm -rf spdlog googletest json +git clone git@github.com:yangyang233333/smallkv.git +cd smallkv ./build.sh ## 编译 ./main_run.sh ## 主程序 ./unittest_run.sh ## 单元测试 ``` --- + ## 设计 + ### 1. **内存池设计** + ![mem_pool](./img/mem_pool_design.png) ### 2. **缓存设计** + ![cache](./img/cache_design.png) -Cache中持有N(默认为5)个指向CachePolicy的指针,相当于5个分片,可以减少哈希冲突以及减少锁的范围;LRUCache和LFUCache都是CachePolicy的子类。 +Cache中持有N(默认为5)个指向CachePolicy的指针,相当于5个分片,可以减少哈希冲突以及减少锁的范围;LRUCache和LFUCache都是CachePolicy的子类。 + +### 3. **SSTable设计** -### 3. **SSTable设计** 每个.sst文件存储一个SSTable结构,SSTable结构如下所示: ![sstable_schema](./img/sstable.png) -下面细说每个模块的内容: -- #### 3.1 DataBlock +下面细说每个模块的内容: + +- #### 3.1 DataBlock + ![data_block_schema](./img/data_block_schema.png) - 1)上图中,每个Record存储了具体的KV数据,并且记录了连续的Key的共享长度(为了差值压缩); - 2)Restart主要用来进行二分查找,根据Restart中记录的offset信息可以解析出对应的Record Group中最小的Key,通过比对连续的Restart中的Key可以快速定位K-V pair,每个Restart记录了一个Record Group中的Record数量,以及对应的size和offset,每个Restart长度为12字节; - 3)Restart_NUM记录了Restart的数量; - 4)Restart_Offset记录了Restart的size和offset信息; -- #### 3.2 MetaBlock -MetaBlock中存储了Filter信息(位数组和哈希函数个数),也就是布隆过滤器的数据。为什么需要这个数据?因为sst是顺序append结构,所以写入很快(O(1)),但是查找非常慢(O(N)),于是需要一个布隆过滤器来对请求进行初步的过滤(可以过滤掉一定不存在的KV pair)。 -- #### 3.3 IndexBlock +1)上图中,每个Record存储了具体的KV数据,并且记录了连续的Key的共享长度(为了差值压缩); +2)Restart主要用来进行二分查找,根据Restart中记录的offset信息可以解析出对应的Record +Group中最小的Key,通过比对连续的Restart中的Key可以快速定位K-V pair,每个Restart记录了一个Record +Group中的Record数量,以及对应的size和offset,每个Restart长度为12字节; +3)Restart_NUM记录了Restart的数量; +4)Restart_Offset记录了Restart的size和offset信息; + +- #### 3.2 MetaBlock + +MetaBlock中存储了Filter信息(位数组和哈希函数个数),也就是布隆过滤器的数据。为什么需要这个数据?因为sst是顺序append结构,所以写入很快(O( +1)),但是查找非常慢(O(N)),于是需要一个布隆过滤器来对请求进行初步的过滤(可以过滤掉一定不存在的KV pair)。 + +- #### 3.3 IndexBlock + ![index_block_schema](./img/index_block_schema.png) -IndexBlock存储对应的DataBlock中的最大key信息(注意:实际存储的是shortest_key,并且shortest_key = min{shortest_key > 对应的DataBlock的最大key},这样可以减小比较次数,缓解高并发下的压力);Offset_Info存储了对应DataBlock的size和offset。 -- #### 3.4 Footer +IndexBlock存储对应的DataBlock中的最大key信息(注意:实际存储的是shortest_key,并且shortest_key = min{shortest_key > +对应的DataBlock的最大key},这样可以减小比较次数,缓解高并发下的压力);Offset_Info存储了对应DataBlock的size和offset。 + +- #### 3.4 Footer + ![footer_schema](./img/footer_schema.png) MetaBlock_OffsetInfo记录了MetaBlock的size和offset,IndexBlock_OffsetInfo记录了IndexBlock的offset(第一个IndexBlock的offset)和size(所有IndexBlock的总大小)。 --- + ## 第三方依赖: 1. [spdlog](https://github.com/gabime/spdlog) @@ -92,16 +119,19 @@ MetaBlock_OffsetInfo记录了MetaBlock的size和offset,IndexBlock_OffsetInfo 4. [abseil](https://github.com/abseil/abseil-cpp) --- -## 参考: + +## 有用的参考资料: 1. [阿里云NewSQL数据库大赛](https://tianchi.aliyun.com/competition/entrance/531980/introduction) 2. [corekv](https://github.com/hardcore-os/coreKV-CPP) 3. [leveldb](https://github.com/google/leveldb) 4. [LSM树原理](https://zhuanlan.zhihu.com/p/181498475) 5. [LSM Tree是什么?](https://www.zhihu.com/question/446544471/answer/2348883977) -6. [WAL](https://zhuanlan.zhihu.com/p/258091002) +6. [WAL](https://zhuanlan.zhihu.com/p/258091002) +7. [Linux I/O: fsync, fflush, fwrite, mmap](https://juejin.cn/post/7001665675907301412) --- 感谢 [JetBrains](https://jb.gg/OpenSourceSupport) 捐献的免费许可证帮助我们开发smallkv。 -Thanks to [JetBrains](https://jb.gg/OpenSourceSupport) for donating product licenses to help develop **smallkv** +Thanks to [JetBrains](https://jb.gg/OpenSourceSupport) for donating product licenses to help develop **smallkv +** diff --git a/img/linux_io.png b/img/linux_io.png new file mode 100644 index 0000000..dc4bb2f Binary files /dev/null and b/img/linux_io.png differ diff --git a/src/cache/cache.h b/src/cache/cache.h index 91f4d6b..2c352b7 100644 --- a/src/cache/cache.h +++ b/src/cache/cache.h @@ -50,6 +50,12 @@ namespace smallkv { return caches[sharding_index]->get(key); } + // 存在则返回true + bool contains(const K &key) { + uint64_t sharding_index = hash_fn(key) % SHARDING_NUM; + return caches[sharding_index]->contains(key); + } + // 释放节点(引用计数减一) void release(const K &key) { uint64_t sharding_index = hash_fn(key) % SHARDING_NUM; diff --git a/src/cache/cache_policy.h b/src/cache/cache_policy.h index e817a62..a4594a9 100644 --- a/src/cache/cache_policy.h +++ b/src/cache/cache_policy.h @@ -25,6 +25,9 @@ namespace smallkv { // 查 virtual Node *get(const K &key) = 0; + // 存在则返回true + virtual bool contains(const K &key) = 0; + // 释放节点(引用计数减一) virtual void release(const K &key) = 0; diff --git a/src/cache/lru.h b/src/cache/lru.h index 550bf51..fb647ed 100644 --- a/src/cache/lru.h +++ b/src/cache/lru.h @@ -156,6 +156,12 @@ namespace smallkv { return *(iter->second); } + // 存在则返回true + bool contains(const K &key) { + ScopedLock lock_guard(locker); + return index.find(key) != index.end(); + } + // 释放节点(引用计数减一) void release(const K &key) override { ScopedLock lock_guard(locker); diff --git a/src/db/db.cpp b/src/db/db.cpp new file mode 100644 index 0000000..3a18f23 --- /dev/null +++ b/src/db/db.cpp @@ -0,0 +1,40 @@ +// +// Created by qianyy on 2023/1/28. +// +#include "db.h" +#include "db_impl.h" + +namespace smallkv { + DB::DB(const Options &options) { + db_impl = std::make_unique(options); + } + + DBStatus DB::Put(const WriteOptions &options, + const std::string_view &key, + const std::string_view &value) { + return db_impl->Put(options, key, value); + } + + DBStatus DB::Delete(const WriteOptions &options, + const std::string_view &key) { + return db_impl->Delete(options, key); + } + + DBStatus DB::Get(const ReadOptions &options, + const std::string_view &key, + std::string *value) { + return db_impl->Get(options, key, value); + } + + DBStatus DB::BatchPut(const WriteOptions &options) { + return db_impl->BatchPut(options); + } + + DBStatus DB::BatchDelete(const ReadOptions &options) { + return db_impl->BatchDelete(options); + } + + DBStatus DB::Close() { + return db_impl->Close(); + } +} diff --git a/src/db/db.h b/src/db/db.h new file mode 100644 index 0000000..0b77838 --- /dev/null +++ b/src/db/db.h @@ -0,0 +1,49 @@ +// +// Created by qianyy on 2023/1/27. +// +#include +#include +#include "status.h" +#include "options.h" + +#ifndef SMALLKV_DB_H +#define SMALLKV_DB_H +namespace smallkv { + class DBImpl; + + class DB { + public: + explicit DB(const Options& options); + + ~DB() = default; + + // DB 应该是单例,禁止拷贝、赋值 + DB(const DB &) = delete; + + DB &operator=(const DB &) = delete; + + DBStatus Put(const WriteOptions &options, + const std::string_view &key, + const std::string_view &value); + + DBStatus Delete(const WriteOptions &options, + const std::string_view &key); + + // 将Key对应的值写到value地址上 + DBStatus Get(const ReadOptions &options, + const std::string_view &key, + std::string *value); + + // 批写 + DBStatus BatchPut(const WriteOptions &options); + + DBStatus BatchDelete(const ReadOptions &options); + + // 关闭数据库:调用此函数可以保证所有已写入数据会被持久化到磁盘, + DBStatus Close(); + + private: + std::unique_ptr db_impl; + }; +} +#endif //SMALLKV_DB_H diff --git a/src/db/db_impl.cpp b/src/db/db_impl.cpp new file mode 100644 index 0000000..7bb63bf --- /dev/null +++ b/src/db/db_impl.cpp @@ -0,0 +1,204 @@ +// +// Created by qianyy on 2023/1/28. +// + +#include + +#include "db_impl.h" +#include "cache/cache.h" +#include "utils/codec.h" +#include "memory/allocate.h" +#include "memtable/memtable.h" +#include "wal/wal_writer.h" +#include "file/file_writer.h" +#include "table/sstable_builder.h" + +namespace smallkv { + DBImpl::DBImpl(Options options) : options_(std::move(options)) { + alloc = std::make_shared(); + mem_table = std::make_shared(alloc); + logger = log::get_logger(); + auto file_writer_ = std::make_shared(options_.DB_DIR); + wal_writer = std::make_shared(file_writer_); + + cache = std::make_shared>(options_.CACHE_SIZE); + cache->register_clean_handle([](const std::string &key, std::string *val) { + delete val; + }); + } + + DBStatus DBImpl::Put(const WriteOptions &options, + const std::string_view &key, + const std::string_view &value) { + assert(closed == false); + /* + * 写逻辑: + * 1. 写WAL(fsync同步); + * 2. 写memtable; + * 3. 写缓存(提高读性能); + * 4. 如果memtable超限,应该落盘,并且开启一个新的memtable; + * + * */ + std::unique_lock wlock(rwlock_); + + // 1. 写WAL + char buf[8 + key.size() + value.size()]; + EncodeKV(key, value, buf); // 将K-V编码到buf中 + wal_writer->AddLog(buf); + + // 2. 写memtable + if (mem_table->Contains(key)) { // Update + mem_table->Update(key, value); + } else { // New Insert + mem_table->Add(key, value); + } + + // 3. 写缓存 + // todo: 写入时候不一定需要写入缓存. 如果一次性写入大量数据,实际上不需要每次 + // 都更新缓存,可以设置一种动态的、热点感知的缓存机制。后续有空优化。 + // todo: 此处采用new std::string()性能很差,后续需要修改底层的cache接口。 + cache->insert(key.data(), new std::string(value.data())); + + // 4. 判断MemTable是否超限, 如果超限应该转为L1SST后持久化 + if (mem_table->GetMemUsage() >= options_.MEM_TABLE_MAX_SIZE) { + MemTableToSST(); // 将memtable转为sst + + // 开启写的memtable + mem_table = std::make_shared(alloc); + logger->info("[DBImpl::Put] A new mem_table is created."); + } + return Status::Success; + } + + DBStatus DBImpl::Delete(const WriteOptions &options, + const std::string_view &key) { + assert(closed == false); + /* + * 删除逻辑: + * 1. 写WAL; + * 2. 写memtable; + * 3. 删除缓存; + * 4. 如果memtable超限,应该落盘,并且开启一个新的memtable; + * */ + std::unique_lock wlock(rwlock_); + + // 1. 写WAL + char buf[8 + key.size()]; // 用vel_len=0表示val为空 + EncodeKV(key, "", buf); + wal_writer->AddLog(buf); + + // 2. 写memtable + if (mem_table->Contains(key)) { // 原地标记val=""表示删除 + mem_table->Delete(key); + } else { + mem_table->Add(key, ""); // 墓碑机制 + } + + // 3. 删除缓存 + cache->erase(key.data()); + + // 4. 检查memtable是否超限 + if (mem_table->GetMemUsage() >= options_.MEM_TABLE_MAX_SIZE) { + MemTableToSST(); // 将memtable转为sst + + // 开启写的memtable + mem_table = std::make_shared(alloc); + logger->info("[DBImpl::Delete] A new mem_table is created."); + } + return Status::Success; + } + + DBStatus DBImpl::Get(const ReadOptions &options, + const std::string_view &key, + std::string *value) { + assert(closed == false); + /* + * 读逻辑: + * 1. 读缓存,有则直接返回,否则进入2; + * 2. 依次从memtable、sst文件向下查找; + * 3. 找到的数据写入缓存; + * 4. 返回结果; + * + * */ + std::shared_lock rlock(rwlock_); + + // 1. 读缓存 + if (cache->contains(key.data())) { + *value = *(cache->get(key.data())->val); + return Status::Success; + } + + // 2. 读memtable + if (mem_table->Contains(key)) { + auto val = mem_table->Get(key); + *value = mem_table->Get(key.data()).value(); + return Status::Success; + } + + // 3. 依次读sst文件 + // todo: 后续实现 + + // 4. 找到的数据写入缓存 + // todo + + + return Status::Success; + } + + DBStatus DBImpl::BatchPut(const WriteOptions &options) { + std::unique_lock wlock(rwlock_); + assert(closed == false); + // todo: 稍后实现 + return Status::NotImpl; + } + + DBStatus DBImpl::BatchDelete(const ReadOptions &options) { + std::unique_lock wlock(rwlock_); + assert(closed == false); + // todo: 稍后实现 + return Status::NotImpl; + } + + void DBImpl::EncodeKV(const std::string_view &key, + const std::string_view &value, + char *buf) { + /* + * 暂时采用的编码方法如下: + * +-------------+-----+-------------+-----+ + * | key_len(4B) | key | val_len(4B) | val | + * +-------------+-----+-------------+-----+ + * todo: 存在优化空间,例如使用variant等,后续有空再说 + * + * */ + assert(value.size() < UINT32_MAX); + utils::EncodeFixed32(buf, key.size()); + memcpy(buf + 4, key.data(), key.size()); + utils::EncodeFixed32(buf + 4 + key.size(), value.size()); + memcpy(buf + 4 + key.size() + 4, value.data(), value.size()); + } + + void DBImpl::MemTableToSST() { + // todo: 此处采用同步方法(为了debug方便),后续需要修改为异步 + + // 格式为/.../level_n_sst_i.sst + auto sst_filepath = options_.STORAGE_DIR + "/" + utils::BuildSSTPath(0, options_.LISST_NUM); + logger->info("DBImpl::MemTableToSST() is called. sst_filepath={}", sst_filepath); + + auto file_writer = std::make_shared(sst_filepath); + auto sstable_builder = std::make_shared(mem_table->GetSize(), file_writer); + mem_table->ConvertToL1SST(sst_filepath, sstable_builder); + + ++options_.LISST_NUM; // 下一个sst文件序号+1 + } + + DBStatus DBImpl::Close() { + if (!closed && mem_table->GetSize() > 0) { + // memtable中有数据,就应该落盘 + MemTableToSST(); + + closed = true; + } + logger->info("DB is closed."); + return Status::Success; + } +} diff --git a/src/db/db_impl.h b/src/db/db_impl.h new file mode 100644 index 0000000..0f6394b --- /dev/null +++ b/src/db/db_impl.h @@ -0,0 +1,80 @@ +// +// Created by qianyy on 2023/1/28. +// +#include +#include +#include +#include "status.h" +#include "options.h" +#include "log/log.h" + +#ifndef SMALLKV_DB_IMPL_H +#define SMALLKV_DB_IMPL_H + +namespace smallkv { + template + class Cache; + + class MemTable; + + class WALWriter; + + class FreeListAllocate; + + /* + * 支持并发,线程安全 + * + * */ + class DBImpl { + public: + explicit DBImpl(Options options); + + ~DBImpl() = default; + + // 同时具备Set和Update语义 + DBStatus Put(const WriteOptions &options, + const std::string_view &key, + const std::string_view &value); + + DBStatus Delete(const WriteOptions &options, + const std::string_view &key); + + // 将Key对应的值写到value地址上 + DBStatus Get(const ReadOptions &options, + const std::string_view &key, + std::string *value); + + // 关闭数据库:调用此函数可以保证所有已写入数据会被持久化到磁盘, + DBStatus Close(); + + // 批写 + DBStatus BatchPut(const WriteOptions &options); + + DBStatus BatchDelete(const ReadOptions &options); + + private: + // 将 KV 编码到 buf 中, 必须确保buf长度为8 + key.size() + value.size() + static void EncodeKV(const std::string_view &key, + const std::string_view &value, + char *buf); + + // 将memtable转为sst + void MemTableToSST(); + + private: + std::shared_ptr mem_table; // active memtable + std::shared_ptr logger; // 日志 + std::shared_ptr alloc; // 内存分配器 + std::shared_ptr wal_writer; // 写wal + + std::shared_ptr> cache; // 缓存 + + Options options_; // 配置信息 + + std::shared_mutex rwlock_; // 读写锁 + + bool closed = false; // 表示数据库没有关闭 + }; +} + +#endif //SMALLKV_DB_IMPL_H diff --git a/src/db/options.h b/src/db/options.h new file mode 100644 index 0000000..0c9a3a1 --- /dev/null +++ b/src/db/options.h @@ -0,0 +1,60 @@ +// +// Created by qianyy on 2023/1/27. +// +#include + +#ifndef SMALLKV_OPTIONS_H +#define SMALLKV_OPTIONS_H +namespace smallkv { + // DB的配置信息,如是否开启同步、缓存池等 + struct Options { + //todo: 之前的配置信息已经写到了xxx_config中,后续应该集中到这里 + + // 数据库的存储目录,需要自定义. 例如修改为:"/home/db_storage" + std::string DB_DIR = "/mnt/c/Users/abc/Desktop/smallkv_proj/smallkv/db_storage"; + + // MEM_TABLE的最大大小,超过了就应该落盘 + size_t MEM_TABLE_MAX_SIZE = 4 * 1024 * 1024; // 4MB + + // 缓存的键值对数量 + uint32_t CACHE_SIZE = 4096; + + std::string STORAGE_DIR = "./storage"; + + // 表示当前L1SST的序号。 L1SST的命名类似level_1_sst_0.sst, level_1_sst_1.sst, .... + // 开始的时候需要扫描 STORAGE_DIR 目录,找到下一个sst的LISST_NUM + uint32_t LISST_NUM = 0; + }; + + inline Options MakeOptionsForDebugging() { + return Options{}; + } + + inline Options MakeOptionsForProduction() { + + } + + // 读时候的配置信息 + struct ReadOptions { + // 扩展性备用接口。 + }; + + //写时候的配置信息 + struct WriteOptions { + /* + * 注:C库缓冲 --fflush--> 内核缓冲 --fsync--> 磁盘 + * 解释: + * 1. fsync系统调用可以强制每次写入都被更新到磁盘中,在open()中添加O_SYNC也由此效果; + * 2. fflush是一个在C语言标准输入输出库中的函数,功能是冲洗流中的信息,该函数通常用于 + * 处理磁盘文件。fflush()会强迫将缓冲区内的数据写回参数stream 指定的文件中。 + * 一般地,fsync也不能保证100%安全,因为现在的磁盘也有缓存(比如固态硬盘可能有外置DRAM缓存), + * 如果断电数据也可能会丢失。但是企业级硬盘一般有备用电源,并且很多固态的缓存是用的SLC颗粒(断电不丢失), + * 所以基本可以认为fsync可以保证数据安全。 + * + * */ + // 此处的flush和fflush语义相同,实际上flush不需要设置为true,因为WAL已经保证了数据安全(fsync)。 + // todo: Flush这个开关暂时无效,后续有空实现 + bool Flush = false; + }; +} +#endif //SMALLKV_OPTIONS_H diff --git a/src/db/status.h b/src/db/status.h index 73810e8..135c325 100644 --- a/src/db/status.h +++ b/src/db/status.h @@ -24,6 +24,7 @@ namespace smallkv { static constexpr DBStatus Success = {1, "Success."}; static constexpr DBStatus InvalidArgs = {2, "Invalid args."}; static constexpr DBStatus ExecFailed = {3, "Exec failed."}; + static constexpr DBStatus NotImpl = {4, "Not implemented."}; }; } diff --git a/src/memtable/memtable.cpp b/src/memtable/memtable.cpp index 362bdb2..46beb1b 100644 --- a/src/memtable/memtable.cpp +++ b/src/memtable/memtable.cpp @@ -3,8 +3,12 @@ // #include "skiplist.h" #include "memtable.h" + +#include #include "utils/codec.h" #include "log/log.h" +#include "table/sstable_builder.h" +#include "memtable_iterator.h" namespace smallkv { MemTable::MemTable(std::shared_ptr alloc) : alloc(std::move(alloc)) { @@ -43,4 +47,31 @@ namespace smallkv { std::optional MemTable::Get(const std::string_view &key) { return ordered_table_->Get(key.data()); } + + void MemTable::ConvertToL1SST(const std::string &sst_filepath, + std::shared_ptr sstable_builder) { + // todo: 这里可能需要加锁。 + auto iter = NewIter(); + iter->MoveToFirst(); // 指向表头 + while (iter->Valid()) { + sstable_builder->add(iter->key(), iter->value()); + iter->Next(); + } + logger->info("The L1 SST file is built."); + + // todo:后续需要改为异步落盘 + sstable_builder->finish_sst(); // sst文件写到磁盘 + } + + MemTableIterator *MemTable::NewIter() { + return new MemTableIterator(this->ordered_table_.get()); + } + + int64_t MemTable::GetMemUsage() { + return ordered_table_->GetMemUsage(); + } + + int64_t MemTable::GetSize() { + return ordered_table_->GetSize(); + } } \ No newline at end of file diff --git a/src/memtable/memtable.h b/src/memtable/memtable.h index 32a492f..d571aef 100644 --- a/src/memtable/memtable.h +++ b/src/memtable/memtable.h @@ -16,6 +16,10 @@ namespace smallkv { class FreeListAllocate; + class SSTableBuilder; + + class MemTableIterator; + /* * Insert逻辑: * 1. Add key, OpType=kAdd @@ -55,11 +59,25 @@ namespace smallkv { this->Insert(OpType::kDeletion, key, ""); } + // 获得memtable底层的跳表的内存占用 + int64_t GetMemUsage(); + + // 获得memtable底层的跳表的key数量 + int64_t GetSize(); + bool Contains(const std::string_view &key); // 如果不存在则返回nullopt std::optional Get(const std::string_view &key); + // 将内存中的memtable转为磁盘中的l1 sst + // sst_filepath格式为"/a/b/c.sst" + void ConvertToL1SST(const std::string &sst_filepath, + std::shared_ptr sstable_builder); + + // 外部调用,创建一个MemIter,来遍历MemTable底层的跳表,本质上有跳表中的Iter提供支持 + MemTableIterator *NewIter(); + private: // Add、Update、Delete都属于Insert // 如果是Delete,则value="" @@ -67,6 +85,9 @@ namespace smallkv { void Insert(OpType op_type, const std::string_view &key, const std::string_view &value); + // 在leveldb中学到的设计模式:声明一个友元迭代器,然后提供一个NewIter的public方法给外部创建迭代器 + friend class MemTableIterator; + private: std::shared_ptr> ordered_table_; std::shared_ptr alloc; diff --git a/src/memtable/memtable_iterator.cpp b/src/memtable/memtable_iterator.cpp new file mode 100644 index 0000000..3ce90b8 --- /dev/null +++ b/src/memtable/memtable_iterator.cpp @@ -0,0 +1,23 @@ +// +// Created by qianyy on 2023/1/29. +// + +#include "memtable_iterator.h" + +namespace smallkv { + MemTableIterator::MemTableIterator(SkipList *list) { + iter_ = std::make_shared(list); + } + + void MemTableIterator::MoveToFirst() { iter_->MoveToFirst(); } + + void MemTableIterator::Next() { iter_->Next(); } + + const std::string &MemTableIterator::key() { return iter_->key(); } + + const std::string &MemTableIterator::value() { return iter_->value(); } + + // 判断当前iter指向的位置是否有效 + bool MemTableIterator::Valid() { return iter_->Valid(); } + +} \ No newline at end of file diff --git a/src/memtable/memtable_iterator.h b/src/memtable/memtable_iterator.h new file mode 100644 index 0000000..4e4e65a --- /dev/null +++ b/src/memtable/memtable_iterator.h @@ -0,0 +1,35 @@ +// +// Created by qianyy on 2023/1/29. +// +#include "skiplist.h" +#include "memtable.h" +#include "table/sstable_builder.h" + +#ifndef SMALLKV_MEMTABLE_ITERATOR_H +#define SMALLKV_MEMTABLE_ITERATOR_H +namespace smallkv { + // 主要用于迭代遍历MemTable + class MemTableIterator final { + private: + using SKIter = SkipList::SkipListIterator; + + std::shared_ptr iter_; + + public: + explicit MemTableIterator(SkipList *list); + + // 将当前node移到表头 + // 必须要先调用此函数才可以进行迭代 + void MoveToFirst(); + + void Next(); + + const std::string &key(); + + const std::string &value(); + + // 判断当前iter指向的位置是否有效 + bool Valid(); + }; +} +#endif //SMALLKV_MEMTABLE_ITERATOR_H diff --git a/src/memtable/skiplist.h b/src/memtable/skiplist.h index c1911f8..b61270f 100644 --- a/src/memtable/skiplist.h +++ b/src/memtable/skiplist.h @@ -53,6 +53,34 @@ namespace smallkv { inline int GetSize() { return size; } + inline int64_t GetMemUsage() { return mem_usage; } + + // 迭代skiplist,主要是给MemTable中的MemeIterator调用 + class SkipListIterator { + public: + explicit SkipListIterator(const SkipList *list); + + // 如果当前iter指向的位置有效,则返回true + bool Valid(); + + const Key &key(); + + const Value &value(); + + void Next(); + + // todo: Prev暂时不支持,需要修改底层的跳变api,后续有空再说 + void Prev() = delete; + + // 将当前node移到表头 + // 必须要先调用此函数才可以进行迭代 + void MoveToFirst(); + + private: + const SkipList *list_; + Node *node; // 当前iter指向的节点 + }; + private: int RandomLevel(); @@ -68,12 +96,46 @@ namespace smallkv { std::shared_ptr alloc; - int max_level; // 当前表的最大高度节点 - int64_t size = 0; //表中数据量 + int max_level; // 当前表的最大高度节点 + int64_t size = 0; // 表中数据量(kv键值对数量) + int64_t mem_usage = 0; // kv键值对所占用的内存大小,单位:Byte std::shared_ptr logger = log::get_logger(); }; + template + void SkipList::SkipListIterator::MoveToFirst() { + node = list_->head_->next[0]; + } + + template + void SkipList::SkipListIterator::Next() { + assert(Valid()); + node = node->next[0]; // 遍历肯定是在跳表最底层进行遍历,所以是0 + } + + template + const Key &SkipList::SkipListIterator::key() { + assert(Valid()); + return node->key; + } + + template + const Value &SkipList::SkipListIterator::value() { + assert(Valid()); + return node->value; + } + + template + bool SkipList::SkipListIterator::Valid() { + return node != nullptr; + } + + template + SkipList::SkipListIterator::SkipListIterator(const SkipList *list) : list_(list) { + node = nullptr; + } + template std::optional SkipList::Get(const Key &key) { int level = GetCurrentHeight() - 1; @@ -115,7 +177,7 @@ namespace smallkv { // todo: 这里可以优化为 std::vector prev(GetCurrentHeight, nullptr); // 可以减少一定的计算量,后期优化性能时考虑 std::vector prev(SkipListConfig::kMaxHeight, nullptr); -// FindPrevNode(key, prev); + int level = GetCurrentHeight() - 1; auto cur = head_; int level_of_target_node = -1;// 目标节点的层数 @@ -126,7 +188,6 @@ namespace smallkv { logger->error("A error point."); break; // 遍历完成. 实际上这个分支不可能到达 } else { -// prev[level] = cur; --level; } } else { @@ -148,9 +209,11 @@ namespace smallkv { } } } -// assert(level_of_target_node > 0); -// assert(level_of_target_node <= prev.size()); -// logger->info("level_of_target_node={}", level_of_target_node); + + // 更新内存占用 + mem_usage -= key.size(); + mem_usage -= prev[0]->next[0]->value.size(); // prev[0]->next[0]指向待删除的节点 + for (int i = 0; i < level_of_target_node; ++i) { if (prev[i] != nullptr) { assert(prev[i]->next[i] != nullptr); @@ -196,6 +259,11 @@ namespace smallkv { ++size; // 更新size + // todo:这种写法导致了Key、Value必须为string、string_view类型, + // 模板名存实亡,后续需要改进。 + mem_usage += key.size(); + mem_usage += value.size(); + // todo: 这里可以优化为 std::vector prev(GetCurrentHeight, nullptr); // 可以减少一定的计算量,后期优化性能时考虑 std::vector prev(SkipListConfig::kMaxHeight, nullptr); diff --git a/src/table/sstable_builder.cpp b/src/table/sstable_builder.cpp index a9d970d..86d057c 100644 --- a/src/table/sstable_builder.cpp +++ b/src/table/sstable_builder.cpp @@ -35,7 +35,6 @@ namespace smallkv { //写入dataBlock dataBlockBuilder->add(key, val); ++key_count; - pre_key = key; // 如果DataBlockBuilder大小超过限制,则应该把DataBlockBuilder落盘,然后清空DataBlockBuilder if (dataBlockBuilder->size() > SSTConfigInfo::MAX_DATA_BLOCK_SIZE) { // 当add_restart_points函数被调用完成的时候,表明当前DataBlock @@ -62,6 +61,7 @@ namespace smallkv { // 持久化完成后,清空当前dataBlockBuilder dataBlockBuilder->clear(); } + pre_key = key; return Status::Success; } diff --git a/src/utils/codec.h b/src/utils/codec.h index 684decc..241a7c8 100644 --- a/src/utils/codec.h +++ b/src/utils/codec.h @@ -38,5 +38,9 @@ namespace smallkv::utils { dst.append(buf, sizeof(val)); } + // 构建形如"level_n_sst_i.sst"的文件名,其中n是level层数,i是该层的第i个sst文件 + inline std::string BuildSSTPath(uint32_t n, uint32_t i) { + return "level_" + std::to_string(n) + "_sst_" + std::to_string(i) + ".sst"; + } } #endif //SMALLKV_CODEC_H diff --git a/tests/test_db.cpp b/tests/test_db.cpp new file mode 100644 index 0000000..ab21e91 --- /dev/null +++ b/tests/test_db.cpp @@ -0,0 +1,43 @@ +// +// Created by qianyy on 2023/1/29. +// +#include +#include +#include +#include "db/options.h" +#include "db/db.h" +#include "db/db_impl.h" + +namespace smallkv::unittest { + TEST(DB, Put_Get) { + auto logger = log::get_logger(); + auto test_options = MakeOptionsForDebugging(); + auto db_holder = std::make_unique(test_options); + WriteOptions wOp; + ReadOptions rOp; + // 生成测试数据 + const int N = 1000; + std::vector data_key, data_val; + for (int i = 0; i < N; ++i) { + data_key.push_back("key_" + std::to_string(i)); + data_val.push_back("val_" + std::to_string(i)); + } + std::sort(data_key.begin(), data_key.end()); + std::sort(data_val.begin(), data_val.end()); + + // 插入数据 + for (int i = 0; i < N; ++i) { + db_holder->Put(wOp, data_key[i], data_val[i]); + } + + // 检查数据 + std::string *value = new std::string(); + for (int i = 0; i < N; ++i) { + EXPECT_EQ(db_holder->Get(rOp, data_key[i], value), Status::Success); + EXPECT_EQ(*value, data_val[i]); + value->clear(); + } + + db_holder->Close(); + } +} \ No newline at end of file diff --git a/tests/test_memtable.cpp b/tests/test_memtable.cpp index 6766d20..7fd792a 100644 --- a/tests/test_memtable.cpp +++ b/tests/test_memtable.cpp @@ -7,6 +7,7 @@ #include #include #include "memtable/memtable.h" +#include "memtable/memtable_iterator.h" #include "memory/allocate.h" namespace smallkv::unittest { @@ -77,4 +78,34 @@ namespace smallkv::unittest { } } } + + TEST(MemTable, MemTableIterator) { + auto alloc = std::make_shared(); + auto mem_table = std::make_shared(alloc); + + const int N = 1000; + // 构建插入数据 + std::vector data_key, data_value; + for (int i = 0; i < N; ++i) { + data_key.emplace_back("key_" + std::to_string(i)); + data_value.emplace_back("value_" + std::to_string(i)); + } + + std::sort(data_key.begin(), data_key.end()); + std::sort(data_value.begin(), data_value.end()); + + // 插入 + for (int i = 0; i < N; ++i) { + mem_table->Add(data_key[i], data_value[i]); + } + + auto iter = mem_table->NewIter(); + iter->MoveToFirst(); + // 测试迭代器 + for (int i = 0; i < N; ++i) { + EXPECT_EQ(iter->key(), data_key[i]); + EXPECT_EQ(iter->value(), data_value[i]); + iter->Next(); + } + } } diff --git a/tests/test_skiplist.cpp b/tests/test_skiplist.cpp index 2da76a5..d777055 100644 --- a/tests/test_skiplist.cpp +++ b/tests/test_skiplist.cpp @@ -145,4 +145,38 @@ namespace smallkv::unittest { } } } + + + TEST(skiplist, GetMemUsage_and_GetSize) { + auto alloc = std::make_shared(); + std::shared_ptr> skiplist = + std::make_shared>(alloc); + + EXPECT_EQ(skiplist->GetSize(), 0); + EXPECT_EQ(skiplist->GetMemUsage(), 0); + + skiplist->Insert("1", "value_1"); + EXPECT_EQ(skiplist->GetSize(), 1); + EXPECT_EQ(skiplist->GetMemUsage(), 8); + + skiplist->Insert("3", "value_3"); + EXPECT_EQ(skiplist->GetSize(), 2); + EXPECT_EQ(skiplist->GetMemUsage(), 16); + + skiplist->Insert("5", "value_5"); + EXPECT_EQ(skiplist->GetSize(), 3); + EXPECT_EQ(skiplist->GetMemUsage(), 24); + + skiplist->Delete("1"); + EXPECT_EQ(skiplist->GetSize(), 2); + EXPECT_EQ(skiplist->GetMemUsage(), 16); + + skiplist->Delete("3"); + EXPECT_EQ(skiplist->GetSize(), 1); + EXPECT_EQ(skiplist->GetMemUsage(), 8); + + skiplist->Delete("5"); + EXPECT_EQ(skiplist->GetSize(), 0); + EXPECT_EQ(skiplist->GetMemUsage(), 0); + } }