diff --git a/.gitignore b/.gitignore
index a9c1fac..16179e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,7 @@ test_temp
# others
ref
-build/*
\ No newline at end of file
+build/*
+
+storage
+db_storage
\ No newline at end of file
diff --git a/README.md b/README.md
index c8ec55f..76f3cce 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ smallkv 是一个列存的、基于LSM架构的存储引擎。
**项目正在疯狂迭代中!!**
---
+
## 进度
- [x] 跳表
@@ -27,13 +28,18 @@ smallkv 是一个列存的、基于LSM架构的存储引擎。
- [ ] 读流程
- [ ] 写流程
- [ ] Compaction模块
+- [ ] 用FreeListAllocate(src/memory/allocate.h)替换系统内存分配器
---
+
## BUILD
-You must use the g++ compiler and Ubuntu 22.04 system.
+You must use the g++ compiler(with C++ 17 supported) and Ubuntu 22.04 system.
+
### build from docker (Highly recommended)
+
```shell
+git clone git@github.com:yangyang233333/smallkv.git
docker pull qianyy2333/smallkv-test
docker run -it -v /{smallkv代码所在的目录}:/test qianyy2333/smallkv-test /bin/bash
./build.sh ## 编译
@@ -42,6 +48,7 @@ docker run -it -v /{smallkv代码所在的目录}:/test qianyy2333/smallkv-test
```
### build from source code:
+
```shell
# 安装依赖
apt update && apt upgrade -y && apt install cmake make git g++ gcc -y && cd ~ \
@@ -50,40 +57,60 @@ apt update && apt upgrade -y && apt install cmake make git g++ gcc -y && cd ~ \
&& git clone https://github.com/nlohmann/json && cd json && mkdir build && cd build && cmake .. && make -j && sudo make install && cd ~ \
&& git clone https://github.com/abseil/abseil-cpp.git && cd abseil-cpp && mkdir build && cd build && cmake .. && make -j && make install && cd ~ \
&& rm -rf spdlog googletest json
+git clone git@github.com:yangyang233333/smallkv.git
+cd smallkv
./build.sh ## 编译
./main_run.sh ## 主程序
./unittest_run.sh ## 单元测试
```
---
+
## 设计
+
### 1. **内存池设计**
+

### 2. **缓存设计**
+

-Cache中持有N(默认为5)个指向CachePolicy的指针,相当于5个分片,可以减少哈希冲突以及减少锁的范围;LRUCache和LFUCache都是CachePolicy的子类。
+Cache中持有N(默认为5)个指向CachePolicy的指针,相当于5个分片,可以减少哈希冲突以及减少锁的范围;LRUCache和LFUCache都是CachePolicy的子类。
+
+### 3. **SSTable设计**
-### 3. **SSTable设计**
每个.sst文件存储一个SSTable结构,SSTable结构如下所示:

-下面细说每个模块的内容:
-- #### 3.1 DataBlock
+下面细说每个模块的内容:
+
+- #### 3.1 DataBlock
+

- 1)上图中,每个Record存储了具体的KV数据,并且记录了连续的Key的共享长度(为了差值压缩);
- 2)Restart主要用来进行二分查找,根据Restart中记录的offset信息可以解析出对应的Record Group中最小的Key,通过比对连续的Restart中的Key可以快速定位K-V pair,每个Restart记录了一个Record Group中的Record数量,以及对应的size和offset,每个Restart长度为12字节;
- 3)Restart_NUM记录了Restart的数量;
- 4)Restart_Offset记录了Restart的size和offset信息;
-- #### 3.2 MetaBlock
-MetaBlock中存储了Filter信息(位数组和哈希函数个数),也就是布隆过滤器的数据。为什么需要这个数据?因为sst是顺序append结构,所以写入很快(O(1)),但是查找非常慢(O(N)),于是需要一个布隆过滤器来对请求进行初步的过滤(可以过滤掉一定不存在的KV pair)。
-- #### 3.3 IndexBlock
+1)上图中,每个Record存储了具体的KV数据,并且记录了连续的Key的共享长度(为了差值压缩);
+2)Restart主要用来进行二分查找,根据Restart中记录的offset信息可以解析出对应的Record
+Group中最小的Key,通过比对连续的Restart中的Key可以快速定位K-V pair,每个Restart记录了一个Record
+Group中的Record数量,以及对应的size和offset,每个Restart长度为12字节;
+3)Restart_NUM记录了Restart的数量;
+4)Restart_Offset记录了Restart的size和offset信息;
+
+- #### 3.2 MetaBlock
+
+MetaBlock中存储了Filter信息(位数组和哈希函数个数),也就是布隆过滤器的数据。为什么需要这个数据?因为sst是顺序append结构,所以写入很快(O(
+1)),但是查找非常慢(O(N)),于是需要一个布隆过滤器来对请求进行初步的过滤(可以过滤掉一定不存在的KV pair)。
+
+- #### 3.3 IndexBlock
+

-IndexBlock存储对应的DataBlock中的最大key信息(注意:实际存储的是shortest_key,并且shortest_key = min{shortest_key > 对应的DataBlock的最大key},这样可以减小比较次数,缓解高并发下的压力);Offset_Info存储了对应DataBlock的size和offset。
-- #### 3.4 Footer
+IndexBlock存储对应的DataBlock中的最大key信息(注意:实际存储的是shortest_key,并且shortest_key = min{shortest_key >
+对应的DataBlock的最大key},这样可以减小比较次数,缓解高并发下的压力);Offset_Info存储了对应DataBlock的size和offset。
+
+- #### 3.4 Footer
+

MetaBlock_OffsetInfo记录了MetaBlock的size和offset,IndexBlock_OffsetInfo记录了IndexBlock的offset(第一个IndexBlock的offset)和size(所有IndexBlock的总大小)。
---
+
## 第三方依赖:
1. [spdlog](https://github.com/gabime/spdlog)
@@ -92,16 +119,19 @@ MetaBlock_OffsetInfo记录了MetaBlock的size和offset,IndexBlock_OffsetInfo
4. [abseil](https://github.com/abseil/abseil-cpp)
---
-## 参考:
+
+## 有用的参考资料:
1. [阿里云NewSQL数据库大赛](https://tianchi.aliyun.com/competition/entrance/531980/introduction)
2. [corekv](https://github.com/hardcore-os/coreKV-CPP)
3. [leveldb](https://github.com/google/leveldb)
4. [LSM树原理](https://zhuanlan.zhihu.com/p/181498475)
5. [LSM Tree是什么?](https://www.zhihu.com/question/446544471/answer/2348883977)
-6. [WAL](https://zhuanlan.zhihu.com/p/258091002)
+6. [WAL](https://zhuanlan.zhihu.com/p/258091002)
+7. [Linux I/O: fsync, fflush, fwrite, mmap](https://juejin.cn/post/7001665675907301412)
---
感谢 [JetBrains](https://jb.gg/OpenSourceSupport) 捐献的免费许可证帮助我们开发smallkv。
-Thanks to [JetBrains](https://jb.gg/OpenSourceSupport) for donating product licenses to help develop **smallkv**
+Thanks to [JetBrains](https://jb.gg/OpenSourceSupport) for donating product licenses to help develop **smallkv
+**
diff --git a/img/linux_io.png b/img/linux_io.png
new file mode 100644
index 0000000..dc4bb2f
Binary files /dev/null and b/img/linux_io.png differ
diff --git a/src/cache/cache.h b/src/cache/cache.h
index 91f4d6b..2c352b7 100644
--- a/src/cache/cache.h
+++ b/src/cache/cache.h
@@ -50,6 +50,12 @@ namespace smallkv {
return caches[sharding_index]->get(key);
}
+ // 存在则返回true
+ bool contains(const K &key) {
+ uint64_t sharding_index = hash_fn(key) % SHARDING_NUM;
+ return caches[sharding_index]->contains(key);
+ }
+
// 释放节点(引用计数减一)
void release(const K &key) {
uint64_t sharding_index = hash_fn(key) % SHARDING_NUM;
diff --git a/src/cache/cache_policy.h b/src/cache/cache_policy.h
index e817a62..a4594a9 100644
--- a/src/cache/cache_policy.h
+++ b/src/cache/cache_policy.h
@@ -25,6 +25,9 @@ namespace smallkv {
// 查
virtual Node *get(const K &key) = 0;
+ // 存在则返回true
+ virtual bool contains(const K &key) = 0;
+
// 释放节点(引用计数减一)
virtual void release(const K &key) = 0;
diff --git a/src/cache/lru.h b/src/cache/lru.h
index 550bf51..fb647ed 100644
--- a/src/cache/lru.h
+++ b/src/cache/lru.h
@@ -156,6 +156,12 @@ namespace smallkv {
return *(iter->second);
}
+ // 存在则返回true
+ bool contains(const K &key) {
+ ScopedLock lock_guard(locker);
+ return index.find(key) != index.end();
+ }
+
// 释放节点(引用计数减一)
void release(const K &key) override {
ScopedLock lock_guard(locker);
diff --git a/src/db/db.cpp b/src/db/db.cpp
new file mode 100644
index 0000000..3a18f23
--- /dev/null
+++ b/src/db/db.cpp
@@ -0,0 +1,40 @@
+//
+// Created by qianyy on 2023/1/28.
+//
+#include "db.h"
+#include "db_impl.h"
+
+namespace smallkv {
+ DB::DB(const Options &options) {
+ db_impl = std::make_unique(options);
+ }
+
+ DBStatus DB::Put(const WriteOptions &options,
+ const std::string_view &key,
+ const std::string_view &value) {
+ return db_impl->Put(options, key, value);
+ }
+
+ DBStatus DB::Delete(const WriteOptions &options,
+ const std::string_view &key) {
+ return db_impl->Delete(options, key);
+ }
+
+ DBStatus DB::Get(const ReadOptions &options,
+ const std::string_view &key,
+ std::string *value) {
+ return db_impl->Get(options, key, value);
+ }
+
+ DBStatus DB::BatchPut(const WriteOptions &options) {
+ return db_impl->BatchPut(options);
+ }
+
+ DBStatus DB::BatchDelete(const ReadOptions &options) {
+ return db_impl->BatchDelete(options);
+ }
+
+ DBStatus DB::Close() {
+ return db_impl->Close();
+ }
+}
diff --git a/src/db/db.h b/src/db/db.h
new file mode 100644
index 0000000..0b77838
--- /dev/null
+++ b/src/db/db.h
@@ -0,0 +1,49 @@
+//
+// Created by qianyy on 2023/1/27.
+//
+#include
+#include
+#include "status.h"
+#include "options.h"
+
+#ifndef SMALLKV_DB_H
+#define SMALLKV_DB_H
+namespace smallkv {
+ class DBImpl;
+
+ class DB {
+ public:
+ explicit DB(const Options& options);
+
+ ~DB() = default;
+
+ // DB 应该是单例,禁止拷贝、赋值
+ DB(const DB &) = delete;
+
+ DB &operator=(const DB &) = delete;
+
+ DBStatus Put(const WriteOptions &options,
+ const std::string_view &key,
+ const std::string_view &value);
+
+ DBStatus Delete(const WriteOptions &options,
+ const std::string_view &key);
+
+ // 将Key对应的值写到value地址上
+ DBStatus Get(const ReadOptions &options,
+ const std::string_view &key,
+ std::string *value);
+
+ // 批写
+ DBStatus BatchPut(const WriteOptions &options);
+
+ DBStatus BatchDelete(const ReadOptions &options);
+
+ // 关闭数据库:调用此函数可以保证所有已写入数据会被持久化到磁盘,
+ DBStatus Close();
+
+ private:
+ std::unique_ptr db_impl;
+ };
+}
+#endif //SMALLKV_DB_H
diff --git a/src/db/db_impl.cpp b/src/db/db_impl.cpp
new file mode 100644
index 0000000..7bb63bf
--- /dev/null
+++ b/src/db/db_impl.cpp
@@ -0,0 +1,204 @@
+//
+// Created by qianyy on 2023/1/28.
+//
+
+#include
+
+#include "db_impl.h"
+#include "cache/cache.h"
+#include "utils/codec.h"
+#include "memory/allocate.h"
+#include "memtable/memtable.h"
+#include "wal/wal_writer.h"
+#include "file/file_writer.h"
+#include "table/sstable_builder.h"
+
+namespace smallkv {
+ DBImpl::DBImpl(Options options) : options_(std::move(options)) {
+ alloc = std::make_shared();
+ mem_table = std::make_shared(alloc);
+ logger = log::get_logger();
+ auto file_writer_ = std::make_shared(options_.DB_DIR);
+ wal_writer = std::make_shared(file_writer_);
+
+ cache = std::make_shared>(options_.CACHE_SIZE);
+ cache->register_clean_handle([](const std::string &key, std::string *val) {
+ delete val;
+ });
+ }
+
+ DBStatus DBImpl::Put(const WriteOptions &options,
+ const std::string_view &key,
+ const std::string_view &value) {
+ assert(closed == false);
+ /*
+ * 写逻辑:
+ * 1. 写WAL(fsync同步);
+ * 2. 写memtable;
+ * 3. 写缓存(提高读性能);
+ * 4. 如果memtable超限,应该落盘,并且开启一个新的memtable;
+ *
+ * */
+ std::unique_lock wlock(rwlock_);
+
+ // 1. 写WAL
+ char buf[8 + key.size() + value.size()];
+ EncodeKV(key, value, buf); // 将K-V编码到buf中
+ wal_writer->AddLog(buf);
+
+ // 2. 写memtable
+ if (mem_table->Contains(key)) { // Update
+ mem_table->Update(key, value);
+ } else { // New Insert
+ mem_table->Add(key, value);
+ }
+
+ // 3. 写缓存
+ // todo: 写入时候不一定需要写入缓存. 如果一次性写入大量数据,实际上不需要每次
+ // 都更新缓存,可以设置一种动态的、热点感知的缓存机制。后续有空优化。
+ // todo: 此处采用new std::string()性能很差,后续需要修改底层的cache接口。
+ cache->insert(key.data(), new std::string(value.data()));
+
+ // 4. 判断MemTable是否超限, 如果超限应该转为L1SST后持久化
+ if (mem_table->GetMemUsage() >= options_.MEM_TABLE_MAX_SIZE) {
+ MemTableToSST(); // 将memtable转为sst
+
+ // 开启写的memtable
+ mem_table = std::make_shared(alloc);
+ logger->info("[DBImpl::Put] A new mem_table is created.");
+ }
+ return Status::Success;
+ }
+
+ DBStatus DBImpl::Delete(const WriteOptions &options,
+ const std::string_view &key) {
+ assert(closed == false);
+ /*
+ * 删除逻辑:
+ * 1. 写WAL;
+ * 2. 写memtable;
+ * 3. 删除缓存;
+ * 4. 如果memtable超限,应该落盘,并且开启一个新的memtable;
+ * */
+ std::unique_lock wlock(rwlock_);
+
+ // 1. 写WAL
+ char buf[8 + key.size()]; // 用vel_len=0表示val为空
+ EncodeKV(key, "", buf);
+ wal_writer->AddLog(buf);
+
+ // 2. 写memtable
+ if (mem_table->Contains(key)) { // 原地标记val=""表示删除
+ mem_table->Delete(key);
+ } else {
+ mem_table->Add(key, ""); // 墓碑机制
+ }
+
+ // 3. 删除缓存
+ cache->erase(key.data());
+
+ // 4. 检查memtable是否超限
+ if (mem_table->GetMemUsage() >= options_.MEM_TABLE_MAX_SIZE) {
+ MemTableToSST(); // 将memtable转为sst
+
+ // 开启写的memtable
+ mem_table = std::make_shared(alloc);
+ logger->info("[DBImpl::Delete] A new mem_table is created.");
+ }
+ return Status::Success;
+ }
+
+ DBStatus DBImpl::Get(const ReadOptions &options,
+ const std::string_view &key,
+ std::string *value) {
+ assert(closed == false);
+ /*
+ * 读逻辑:
+ * 1. 读缓存,有则直接返回,否则进入2;
+ * 2. 依次从memtable、sst文件向下查找;
+ * 3. 找到的数据写入缓存;
+ * 4. 返回结果;
+ *
+ * */
+ std::shared_lock rlock(rwlock_);
+
+ // 1. 读缓存
+ if (cache->contains(key.data())) {
+ *value = *(cache->get(key.data())->val);
+ return Status::Success;
+ }
+
+ // 2. 读memtable
+ if (mem_table->Contains(key)) {
+ auto val = mem_table->Get(key);
+ *value = mem_table->Get(key.data()).value();
+ return Status::Success;
+ }
+
+ // 3. 依次读sst文件
+ // todo: 后续实现
+
+ // 4. 找到的数据写入缓存
+ // todo
+
+
+ return Status::Success;
+ }
+
+ DBStatus DBImpl::BatchPut(const WriteOptions &options) {
+ std::unique_lock wlock(rwlock_);
+ assert(closed == false);
+ // todo: 稍后实现
+ return Status::NotImpl;
+ }
+
+ DBStatus DBImpl::BatchDelete(const ReadOptions &options) {
+ std::unique_lock wlock(rwlock_);
+ assert(closed == false);
+ // todo: 稍后实现
+ return Status::NotImpl;
+ }
+
+ void DBImpl::EncodeKV(const std::string_view &key,
+ const std::string_view &value,
+ char *buf) {
+ /*
+ * 暂时采用的编码方法如下:
+ * +-------------+-----+-------------+-----+
+ * | key_len(4B) | key | val_len(4B) | val |
+ * +-------------+-----+-------------+-----+
+ * todo: 存在优化空间,例如使用variant等,后续有空再说
+ *
+ * */
+ assert(value.size() < UINT32_MAX);
+ utils::EncodeFixed32(buf, key.size());
+ memcpy(buf + 4, key.data(), key.size());
+ utils::EncodeFixed32(buf + 4 + key.size(), value.size());
+ memcpy(buf + 4 + key.size() + 4, value.data(), value.size());
+ }
+
+ void DBImpl::MemTableToSST() {
+ // todo: 此处采用同步方法(为了debug方便),后续需要修改为异步
+
+ // 格式为/.../level_n_sst_i.sst
+ auto sst_filepath = options_.STORAGE_DIR + "/" + utils::BuildSSTPath(0, options_.LISST_NUM);
+ logger->info("DBImpl::MemTableToSST() is called. sst_filepath={}", sst_filepath);
+
+ auto file_writer = std::make_shared(sst_filepath);
+ auto sstable_builder = std::make_shared(mem_table->GetSize(), file_writer);
+ mem_table->ConvertToL1SST(sst_filepath, sstable_builder);
+
+ ++options_.LISST_NUM; // 下一个sst文件序号+1
+ }
+
+ DBStatus DBImpl::Close() {
+ if (!closed && mem_table->GetSize() > 0) {
+ // memtable中有数据,就应该落盘
+ MemTableToSST();
+
+ closed = true;
+ }
+ logger->info("DB is closed.");
+ return Status::Success;
+ }
+}
diff --git a/src/db/db_impl.h b/src/db/db_impl.h
new file mode 100644
index 0000000..0f6394b
--- /dev/null
+++ b/src/db/db_impl.h
@@ -0,0 +1,80 @@
+//
+// Created by qianyy on 2023/1/28.
+//
+#include
+#include
+#include
+#include "status.h"
+#include "options.h"
+#include "log/log.h"
+
+#ifndef SMALLKV_DB_IMPL_H
+#define SMALLKV_DB_IMPL_H
+
+namespace smallkv {
+ template
+ class Cache;
+
+ class MemTable;
+
+ class WALWriter;
+
+ class FreeListAllocate;
+
+ /*
+ * 支持并发,线程安全
+ *
+ * */
+ class DBImpl {
+ public:
+ explicit DBImpl(Options options);
+
+ ~DBImpl() = default;
+
+ // 同时具备Set和Update语义
+ DBStatus Put(const WriteOptions &options,
+ const std::string_view &key,
+ const std::string_view &value);
+
+ DBStatus Delete(const WriteOptions &options,
+ const std::string_view &key);
+
+ // 将Key对应的值写到value地址上
+ DBStatus Get(const ReadOptions &options,
+ const std::string_view &key,
+ std::string *value);
+
+ // 关闭数据库:调用此函数可以保证所有已写入数据会被持久化到磁盘,
+ DBStatus Close();
+
+ // 批写
+ DBStatus BatchPut(const WriteOptions &options);
+
+ DBStatus BatchDelete(const ReadOptions &options);
+
+ private:
+ // 将 KV 编码到 buf 中, 必须确保buf长度为8 + key.size() + value.size()
+ static void EncodeKV(const std::string_view &key,
+ const std::string_view &value,
+ char *buf);
+
+ // 将memtable转为sst
+ void MemTableToSST();
+
+ private:
+ std::shared_ptr mem_table; // active memtable
+ std::shared_ptr logger; // 日志
+ std::shared_ptr alloc; // 内存分配器
+ std::shared_ptr wal_writer; // 写wal
+
+ std::shared_ptr> cache; // 缓存
+
+ Options options_; // 配置信息
+
+ std::shared_mutex rwlock_; // 读写锁
+
+ bool closed = false; // 表示数据库没有关闭
+ };
+}
+
+#endif //SMALLKV_DB_IMPL_H
diff --git a/src/db/options.h b/src/db/options.h
new file mode 100644
index 0000000..0c9a3a1
--- /dev/null
+++ b/src/db/options.h
@@ -0,0 +1,60 @@
+//
+// Created by qianyy on 2023/1/27.
+//
+#include
+
+#ifndef SMALLKV_OPTIONS_H
+#define SMALLKV_OPTIONS_H
+namespace smallkv {
+ // DB的配置信息,如是否开启同步、缓存池等
+ struct Options {
+ //todo: 之前的配置信息已经写到了xxx_config中,后续应该集中到这里
+
+ // 数据库的存储目录,需要自定义. 例如修改为:"/home/db_storage"
+ std::string DB_DIR = "/mnt/c/Users/abc/Desktop/smallkv_proj/smallkv/db_storage";
+
+ // MEM_TABLE的最大大小,超过了就应该落盘
+ size_t MEM_TABLE_MAX_SIZE = 4 * 1024 * 1024; // 4MB
+
+ // 缓存的键值对数量
+ uint32_t CACHE_SIZE = 4096;
+
+ std::string STORAGE_DIR = "./storage";
+
+ // 表示当前L1SST的序号。 L1SST的命名类似level_1_sst_0.sst, level_1_sst_1.sst, ....
+ // 开始的时候需要扫描 STORAGE_DIR 目录,找到下一个sst的LISST_NUM
+ uint32_t LISST_NUM = 0;
+ };
+
+ inline Options MakeOptionsForDebugging() {
+ return Options{};
+ }
+
+ inline Options MakeOptionsForProduction() {
+
+ }
+
+ // 读时候的配置信息
+ struct ReadOptions {
+ // 扩展性备用接口。
+ };
+
+ //写时候的配置信息
+ struct WriteOptions {
+ /*
+ * 注:C库缓冲 --fflush--> 内核缓冲 --fsync--> 磁盘
+ * 解释:
+ * 1. fsync系统调用可以强制每次写入都被更新到磁盘中,在open()中添加O_SYNC也由此效果;
+ * 2. fflush是一个在C语言标准输入输出库中的函数,功能是冲洗流中的信息,该函数通常用于
+ * 处理磁盘文件。fflush()会强迫将缓冲区内的数据写回参数stream 指定的文件中。
+ * 一般地,fsync也不能保证100%安全,因为现在的磁盘也有缓存(比如固态硬盘可能有外置DRAM缓存),
+ * 如果断电数据也可能会丢失。但是企业级硬盘一般有备用电源,并且很多固态的缓存是用的SLC颗粒(断电不丢失),
+ * 所以基本可以认为fsync可以保证数据安全。
+ *
+ * */
+ // 此处的flush和fflush语义相同,实际上flush不需要设置为true,因为WAL已经保证了数据安全(fsync)。
+ // todo: Flush这个开关暂时无效,后续有空实现
+ bool Flush = false;
+ };
+}
+#endif //SMALLKV_OPTIONS_H
diff --git a/src/db/status.h b/src/db/status.h
index 73810e8..135c325 100644
--- a/src/db/status.h
+++ b/src/db/status.h
@@ -24,6 +24,7 @@ namespace smallkv {
static constexpr DBStatus Success = {1, "Success."};
static constexpr DBStatus InvalidArgs = {2, "Invalid args."};
static constexpr DBStatus ExecFailed = {3, "Exec failed."};
+ static constexpr DBStatus NotImpl = {4, "Not implemented."};
};
}
diff --git a/src/memtable/memtable.cpp b/src/memtable/memtable.cpp
index 362bdb2..46beb1b 100644
--- a/src/memtable/memtable.cpp
+++ b/src/memtable/memtable.cpp
@@ -3,8 +3,12 @@
//
#include "skiplist.h"
#include "memtable.h"
+
+#include
#include "utils/codec.h"
#include "log/log.h"
+#include "table/sstable_builder.h"
+#include "memtable_iterator.h"
namespace smallkv {
MemTable::MemTable(std::shared_ptr alloc) : alloc(std::move(alloc)) {
@@ -43,4 +47,31 @@ namespace smallkv {
std::optional MemTable::Get(const std::string_view &key) {
return ordered_table_->Get(key.data());
}
+
+ void MemTable::ConvertToL1SST(const std::string &sst_filepath,
+ std::shared_ptr sstable_builder) {
+ // todo: 这里可能需要加锁。
+ auto iter = NewIter();
+ iter->MoveToFirst(); // 指向表头
+ while (iter->Valid()) {
+ sstable_builder->add(iter->key(), iter->value());
+ iter->Next();
+ }
+ logger->info("The L1 SST file is built.");
+
+ // todo:后续需要改为异步落盘
+ sstable_builder->finish_sst(); // sst文件写到磁盘
+ }
+
+ MemTableIterator *MemTable::NewIter() {
+ return new MemTableIterator(this->ordered_table_.get());
+ }
+
+ int64_t MemTable::GetMemUsage() {
+ return ordered_table_->GetMemUsage();
+ }
+
+ int64_t MemTable::GetSize() {
+ return ordered_table_->GetSize();
+ }
}
\ No newline at end of file
diff --git a/src/memtable/memtable.h b/src/memtable/memtable.h
index 32a492f..d571aef 100644
--- a/src/memtable/memtable.h
+++ b/src/memtable/memtable.h
@@ -16,6 +16,10 @@ namespace smallkv {
class FreeListAllocate;
+ class SSTableBuilder;
+
+ class MemTableIterator;
+
/*
* Insert逻辑:
* 1. Add key, OpType=kAdd
@@ -55,11 +59,25 @@ namespace smallkv {
this->Insert(OpType::kDeletion, key, "");
}
+ // 获得memtable底层的跳表的内存占用
+ int64_t GetMemUsage();
+
+ // 获得memtable底层的跳表的key数量
+ int64_t GetSize();
+
bool Contains(const std::string_view &key);
// 如果不存在则返回nullopt
std::optional Get(const std::string_view &key);
+ // 将内存中的memtable转为磁盘中的l1 sst
+ // sst_filepath格式为"/a/b/c.sst"
+ void ConvertToL1SST(const std::string &sst_filepath,
+ std::shared_ptr sstable_builder);
+
+ // 外部调用,创建一个MemIter,来遍历MemTable底层的跳表,本质上有跳表中的Iter提供支持
+ MemTableIterator *NewIter();
+
private:
// Add、Update、Delete都属于Insert
// 如果是Delete,则value=""
@@ -67,6 +85,9 @@ namespace smallkv {
void Insert(OpType op_type, const std::string_view &key,
const std::string_view &value);
+ // 在leveldb中学到的设计模式:声明一个友元迭代器,然后提供一个NewIter的public方法给外部创建迭代器
+ friend class MemTableIterator;
+
private:
std::shared_ptr> ordered_table_;
std::shared_ptr alloc;
diff --git a/src/memtable/memtable_iterator.cpp b/src/memtable/memtable_iterator.cpp
new file mode 100644
index 0000000..3ce90b8
--- /dev/null
+++ b/src/memtable/memtable_iterator.cpp
@@ -0,0 +1,23 @@
+//
+// Created by qianyy on 2023/1/29.
+//
+
+#include "memtable_iterator.h"
+
+namespace smallkv {
+ MemTableIterator::MemTableIterator(SkipList *list) {
+ iter_ = std::make_shared(list);
+ }
+
+ void MemTableIterator::MoveToFirst() { iter_->MoveToFirst(); }
+
+ void MemTableIterator::Next() { iter_->Next(); }
+
+ const std::string &MemTableIterator::key() { return iter_->key(); }
+
+ const std::string &MemTableIterator::value() { return iter_->value(); }
+
+ // 判断当前iter指向的位置是否有效
+ bool MemTableIterator::Valid() { return iter_->Valid(); }
+
+}
\ No newline at end of file
diff --git a/src/memtable/memtable_iterator.h b/src/memtable/memtable_iterator.h
new file mode 100644
index 0000000..4e4e65a
--- /dev/null
+++ b/src/memtable/memtable_iterator.h
@@ -0,0 +1,35 @@
+//
+// Created by qianyy on 2023/1/29.
+//
+#include "skiplist.h"
+#include "memtable.h"
+#include "table/sstable_builder.h"
+
+#ifndef SMALLKV_MEMTABLE_ITERATOR_H
+#define SMALLKV_MEMTABLE_ITERATOR_H
+namespace smallkv {
+ // 主要用于迭代遍历MemTable
+ class MemTableIterator final {
+ private:
+ using SKIter = SkipList::SkipListIterator;
+
+ std::shared_ptr iter_;
+
+ public:
+ explicit MemTableIterator(SkipList *list);
+
+ // 将当前node移到表头
+ // 必须要先调用此函数才可以进行迭代
+ void MoveToFirst();
+
+ void Next();
+
+ const std::string &key();
+
+ const std::string &value();
+
+ // 判断当前iter指向的位置是否有效
+ bool Valid();
+ };
+}
+#endif //SMALLKV_MEMTABLE_ITERATOR_H
diff --git a/src/memtable/skiplist.h b/src/memtable/skiplist.h
index c1911f8..b61270f 100644
--- a/src/memtable/skiplist.h
+++ b/src/memtable/skiplist.h
@@ -53,6 +53,34 @@ namespace smallkv {
inline int GetSize() { return size; }
+ inline int64_t GetMemUsage() { return mem_usage; }
+
+ // 迭代skiplist,主要是给MemTable中的MemeIterator调用
+ class SkipListIterator {
+ public:
+ explicit SkipListIterator(const SkipList *list);
+
+ // 如果当前iter指向的位置有效,则返回true
+ bool Valid();
+
+ const Key &key();
+
+ const Value &value();
+
+ void Next();
+
+ // todo: Prev暂时不支持,需要修改底层的跳变api,后续有空再说
+ void Prev() = delete;
+
+ // 将当前node移到表头
+ // 必须要先调用此函数才可以进行迭代
+ void MoveToFirst();
+
+ private:
+ const SkipList *list_;
+ Node *node; // 当前iter指向的节点
+ };
+
private:
int RandomLevel();
@@ -68,12 +96,46 @@ namespace smallkv {
std::shared_ptr alloc;
- int max_level; // 当前表的最大高度节点
- int64_t size = 0; //表中数据量
+ int max_level; // 当前表的最大高度节点
+ int64_t size = 0; // 表中数据量(kv键值对数量)
+ int64_t mem_usage = 0; // kv键值对所占用的内存大小,单位:Byte
std::shared_ptr logger = log::get_logger();
};
+ template
+ void SkipList::SkipListIterator::MoveToFirst() {
+ node = list_->head_->next[0];
+ }
+
+ template
+ void SkipList::SkipListIterator::Next() {
+ assert(Valid());
+ node = node->next[0]; // 遍历肯定是在跳表最底层进行遍历,所以是0
+ }
+
+ template
+ const Key &SkipList::SkipListIterator::key() {
+ assert(Valid());
+ return node->key;
+ }
+
+ template
+ const Value &SkipList::SkipListIterator::value() {
+ assert(Valid());
+ return node->value;
+ }
+
+ template
+ bool SkipList::SkipListIterator::Valid() {
+ return node != nullptr;
+ }
+
+ template
+ SkipList::SkipListIterator::SkipListIterator(const SkipList *list) : list_(list) {
+ node = nullptr;
+ }
+
template
std::optional SkipList::Get(const Key &key) {
int level = GetCurrentHeight() - 1;
@@ -115,7 +177,7 @@ namespace smallkv {
// todo: 这里可以优化为 std::vector prev(GetCurrentHeight, nullptr);
// 可以减少一定的计算量,后期优化性能时考虑
std::vector prev(SkipListConfig::kMaxHeight, nullptr);
-// FindPrevNode(key, prev);
+
int level = GetCurrentHeight() - 1;
auto cur = head_;
int level_of_target_node = -1;// 目标节点的层数
@@ -126,7 +188,6 @@ namespace smallkv {
logger->error("A error point.");
break; // 遍历完成. 实际上这个分支不可能到达
} else {
-// prev[level] = cur;
--level;
}
} else {
@@ -148,9 +209,11 @@ namespace smallkv {
}
}
}
-// assert(level_of_target_node > 0);
-// assert(level_of_target_node <= prev.size());
-// logger->info("level_of_target_node={}", level_of_target_node);
+
+ // 更新内存占用
+ mem_usage -= key.size();
+ mem_usage -= prev[0]->next[0]->value.size(); // prev[0]->next[0]指向待删除的节点
+
for (int i = 0; i < level_of_target_node; ++i) {
if (prev[i] != nullptr) {
assert(prev[i]->next[i] != nullptr);
@@ -196,6 +259,11 @@ namespace smallkv {
++size; // 更新size
+ // todo:这种写法导致了Key、Value必须为string、string_view类型,
+ // 模板名存实亡,后续需要改进。
+ mem_usage += key.size();
+ mem_usage += value.size();
+
// todo: 这里可以优化为 std::vector prev(GetCurrentHeight, nullptr);
// 可以减少一定的计算量,后期优化性能时考虑
std::vector prev(SkipListConfig::kMaxHeight, nullptr);
diff --git a/src/table/sstable_builder.cpp b/src/table/sstable_builder.cpp
index a9d970d..86d057c 100644
--- a/src/table/sstable_builder.cpp
+++ b/src/table/sstable_builder.cpp
@@ -35,7 +35,6 @@ namespace smallkv {
//写入dataBlock
dataBlockBuilder->add(key, val);
++key_count;
- pre_key = key;
// 如果DataBlockBuilder大小超过限制,则应该把DataBlockBuilder落盘,然后清空DataBlockBuilder
if (dataBlockBuilder->size() > SSTConfigInfo::MAX_DATA_BLOCK_SIZE) {
// 当add_restart_points函数被调用完成的时候,表明当前DataBlock
@@ -62,6 +61,7 @@ namespace smallkv {
// 持久化完成后,清空当前dataBlockBuilder
dataBlockBuilder->clear();
}
+ pre_key = key;
return Status::Success;
}
diff --git a/src/utils/codec.h b/src/utils/codec.h
index 684decc..241a7c8 100644
--- a/src/utils/codec.h
+++ b/src/utils/codec.h
@@ -38,5 +38,9 @@ namespace smallkv::utils {
dst.append(buf, sizeof(val));
}
+ // 构建形如"level_n_sst_i.sst"的文件名,其中n是level层数,i是该层的第i个sst文件
+ inline std::string BuildSSTPath(uint32_t n, uint32_t i) {
+ return "level_" + std::to_string(n) + "_sst_" + std::to_string(i) + ".sst";
+ }
}
#endif //SMALLKV_CODEC_H
diff --git a/tests/test_db.cpp b/tests/test_db.cpp
new file mode 100644
index 0000000..ab21e91
--- /dev/null
+++ b/tests/test_db.cpp
@@ -0,0 +1,43 @@
+//
+// Created by qianyy on 2023/1/29.
+//
+#include
+#include
+#include
+#include "db/options.h"
+#include "db/db.h"
+#include "db/db_impl.h"
+
+namespace smallkv::unittest {
+ TEST(DB, Put_Get) {
+ auto logger = log::get_logger();
+ auto test_options = MakeOptionsForDebugging();
+ auto db_holder = std::make_unique(test_options);
+ WriteOptions wOp;
+ ReadOptions rOp;
+ // 生成测试数据
+ const int N = 1000;
+ std::vector data_key, data_val;
+ for (int i = 0; i < N; ++i) {
+ data_key.push_back("key_" + std::to_string(i));
+ data_val.push_back("val_" + std::to_string(i));
+ }
+ std::sort(data_key.begin(), data_key.end());
+ std::sort(data_val.begin(), data_val.end());
+
+ // 插入数据
+ for (int i = 0; i < N; ++i) {
+ db_holder->Put(wOp, data_key[i], data_val[i]);
+ }
+
+ // 检查数据
+ std::string *value = new std::string();
+ for (int i = 0; i < N; ++i) {
+ EXPECT_EQ(db_holder->Get(rOp, data_key[i], value), Status::Success);
+ EXPECT_EQ(*value, data_val[i]);
+ value->clear();
+ }
+
+ db_holder->Close();
+ }
+}
\ No newline at end of file
diff --git a/tests/test_memtable.cpp b/tests/test_memtable.cpp
index 6766d20..7fd792a 100644
--- a/tests/test_memtable.cpp
+++ b/tests/test_memtable.cpp
@@ -7,6 +7,7 @@
#include
#include
#include "memtable/memtable.h"
+#include "memtable/memtable_iterator.h"
#include "memory/allocate.h"
namespace smallkv::unittest {
@@ -77,4 +78,34 @@ namespace smallkv::unittest {
}
}
}
+
+ TEST(MemTable, MemTableIterator) {
+ auto alloc = std::make_shared();
+ auto mem_table = std::make_shared(alloc);
+
+ const int N = 1000;
+ // 构建插入数据
+ std::vector data_key, data_value;
+ for (int i = 0; i < N; ++i) {
+ data_key.emplace_back("key_" + std::to_string(i));
+ data_value.emplace_back("value_" + std::to_string(i));
+ }
+
+ std::sort(data_key.begin(), data_key.end());
+ std::sort(data_value.begin(), data_value.end());
+
+ // 插入
+ for (int i = 0; i < N; ++i) {
+ mem_table->Add(data_key[i], data_value[i]);
+ }
+
+ auto iter = mem_table->NewIter();
+ iter->MoveToFirst();
+ // 测试迭代器
+ for (int i = 0; i < N; ++i) {
+ EXPECT_EQ(iter->key(), data_key[i]);
+ EXPECT_EQ(iter->value(), data_value[i]);
+ iter->Next();
+ }
+ }
}
diff --git a/tests/test_skiplist.cpp b/tests/test_skiplist.cpp
index 2da76a5..d777055 100644
--- a/tests/test_skiplist.cpp
+++ b/tests/test_skiplist.cpp
@@ -145,4 +145,38 @@ namespace smallkv::unittest {
}
}
}
+
+
+ TEST(skiplist, GetMemUsage_and_GetSize) {
+ auto alloc = std::make_shared();
+ std::shared_ptr> skiplist =
+ std::make_shared>(alloc);
+
+ EXPECT_EQ(skiplist->GetSize(), 0);
+ EXPECT_EQ(skiplist->GetMemUsage(), 0);
+
+ skiplist->Insert("1", "value_1");
+ EXPECT_EQ(skiplist->GetSize(), 1);
+ EXPECT_EQ(skiplist->GetMemUsage(), 8);
+
+ skiplist->Insert("3", "value_3");
+ EXPECT_EQ(skiplist->GetSize(), 2);
+ EXPECT_EQ(skiplist->GetMemUsage(), 16);
+
+ skiplist->Insert("5", "value_5");
+ EXPECT_EQ(skiplist->GetSize(), 3);
+ EXPECT_EQ(skiplist->GetMemUsage(), 24);
+
+ skiplist->Delete("1");
+ EXPECT_EQ(skiplist->GetSize(), 2);
+ EXPECT_EQ(skiplist->GetMemUsage(), 16);
+
+ skiplist->Delete("3");
+ EXPECT_EQ(skiplist->GetSize(), 1);
+ EXPECT_EQ(skiplist->GetMemUsage(), 8);
+
+ skiplist->Delete("5");
+ EXPECT_EQ(skiplist->GetSize(), 0);
+ EXPECT_EQ(skiplist->GetMemUsage(), 0);
+ }
}