-
Notifications
You must be signed in to change notification settings - Fork 47
/
swap_space.hpp
465 lines (393 loc) · 13 KB
/
swap_space.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
// A scheme for transparently swapping data structures in and out of
// memory.
// WARNING: this is very incomplete. It's just enough functionality
// for the betree.cpp. In particular, the current system
// does not handle cycles in the pointer graph or pointers
// into the middle of objects (such as into an array).
// The goal of this code is to enable users to write complex in-memory
// data structures and have a separate layer (i.e. this code) manage
// I/O. Users should be able to define their data structures as they
// see fit (i.e. they can use pointers, etc) but still control the
// granularity at which items are swapped to/from memory.
// Therefore, we define a swap_space::pointer type that represents a
// pointer from one swappable unit to another. When the swapper elects
// to swap out an object X, it will swap out all the objects that X
// points to through regular C++ pointers. All these objects will be
// written to a single place on the backing store, so this will be
// I/O-efficient. The swapper does not traverse swap_space::pointers
// -- they point to separate things that should be swapped out
// independently of the thing pointing to them.
// The betree code provides an example of how this is used. We want
// each node to be swapped in/out as a single unit, but separate nodes
// in the tree should be able to be swapped in/out independently of
// eachother. Therefore, nodes use swap_space::pointers to point to
// eachother. They use regular C++ pointers to point to internal
// items that should be serialized as part of the node.
// The swap_space needs to manage all pointers to swappable objects.
// New swappable objects should be created like this:
// swap_space ss;
// swap_space::pointer<T> p = ss.allocate(new T(constructor args));
// You can then use operator-> as normal, e.g.
// p->some_field
// p->some_method(args)
// Although no operator* is not defined, it should be straightforward
// to do so.
// Invoking p->some_method() pins the object referred to by p in
// memory. Thus, during the execution of some_method(), it is safe to
// dereference "this" and any other plain C++ pointers in the object.
// Objects are automatically garbage collected. The garbage collector
// uses reference counting.
// The current system uses LRU to select items to swap. The swap
// space has a user-specified in-memory cache size it. The cache size
// can be adjusted dynamically.
// Don't try to get your hands on an unwrapped pointer to the object
// or anything that is swapped in/out as part of the object. It can
// only lead to trouble. Casting is also probably a bad idea. Just
// write nice, clean, type-safe, well-encapsulated code and everything
// should work just fine.
// Objects managed by this system must be sub-types of class
// serializable. This basically defines two methods for serializing
// and deserializing the object. See the betree for examples of
// implementing these methods. We provide default implementations for
// a few basic types and STL containers. Feel free to add more and
// submit patches as you need them.
// The current implementation serializes to a textual file format.
// This is just a convenience. It would be nice to be able to swap in
// different formats.
#ifndef SWAP_SPACE_HPP
#define SWAP_SPACE_HPP
#include <cstdint>
#include <unordered_map>
#include <map>
#include <set>
#include <functional>
#include <sstream>
#include <cassert>
#include "backing_store.hpp"
#include "debug.hpp"
class swap_space;
class serialization_context {
public:
serialization_context(swap_space &sspace) :
ss(sspace),
is_leaf(true)
{}
swap_space &ss;
bool is_leaf;
};
class serializable {
public:
virtual void _serialize(std::iostream &fs, serialization_context &context) = 0;
virtual void _deserialize(std::iostream &fs, serialization_context &context) = 0;
virtual ~serializable(void) {};
};
void serialize(std::iostream &fs, serialization_context &context, uint64_t x);
void deserialize(std::iostream &fs, serialization_context &context, uint64_t &x);
void serialize(std::iostream &fs, serialization_context &context, int64_t x);
void deserialize(std::iostream &fs, serialization_context &context, int64_t &x);
void serialize(std::iostream &fs, serialization_context &context, std::string x);
void deserialize(std::iostream &fs, serialization_context &context, std::string &x);
template<class Key, class Value> void serialize(std::iostream &fs,
serialization_context &context,
std::map<Key, Value> &mp)
{
fs << "map " << mp.size() << " {" << std::endl;
assert(fs.good());
for (auto it = mp.begin(); it != mp.end(); ++it) {
fs << " ";
serialize(fs, context, it->first);
fs << " -> ";
serialize(fs, context, it->second);
fs << std::endl;
}
fs << "}" << std::endl;
}
template<class Key, class Value> void deserialize(std::iostream &fs,
serialization_context &context,
std::map<Key, Value> &mp)
{
std::string dummy;
int size = 0;
fs >> dummy >> size >> dummy;
assert(fs.good());
for (int i = 0; i < size; i++) {
Key k;
Value v;
deserialize(fs, context, k);
fs >> dummy;
deserialize(fs, context, v);
mp[k] = v;
}
fs >> dummy;
}
template<class X> void serialize(std::iostream &fs, serialization_context &context, X *&x)
{
fs << "pointer ";
serialize(fs, context, *x);
}
template<class X> void deserialize(std::iostream &fs, serialization_context &context, X *&x)
{
std::string dummy;
x = new X;
fs >> dummy;
assert (dummy == "pointer");
deserialize(fs, context, *x);
}
template<class X> void serialize(std::iostream &fs, serialization_context &context, X &x)
{
x._serialize(fs, context);
}
template<class X> void deserialize(std::iostream &fs, serialization_context &context, X &x)
{
x._deserialize(fs, context);
}
class swap_space {
public:
swap_space(backing_store *bs, uint64_t n);
template<class Referent> class pointer;
template<class Referent>
pointer<Referent> allocate(Referent * tgt) {
return pointer<Referent>(this, tgt);
}
// This pins an object in memory for the duration of a member
// access. It's sort of an instance of the "resource aquisition is
// initialization" paradigm.
template<class Referent>
class pin {
public:
const Referent * operator->(void) const {
assert(ss->objects.count(target) > 0);
debug(std::cout << "Accessing (constly) " << target
<< " (" << ss->objects[target]->target << ")" << std::endl);
access(target, false);
return (const Referent *)ss->objects[target]->target;
}
Referent * operator->(void) {
assert(ss->objects.count(target) > 0);
debug(std::cout << "Accessing " << target
<< " (" << ss->objects[target]->target << ")" << std::endl);
access(target, true);
return (Referent *)ss->objects[target]->target;
}
pin(const pointer<Referent> *p)
: ss(NULL),
target(0)
{
dopin(p->ss, p->target);
}
pin(void)
: ss(NULL),
target(0)
{}
~pin(void) {
unpin();
}
pin &operator=(const pin &other) {
if (&other != this) {
unpin();
dopin(other.ss, other.target);
}
}
private:
void unpin(void) {
debug(std::cout << "Unpinning " << target
<< " (" << ss->objects[target]->target << ")" << std::endl);
if (target > 0) {
assert(ss->objects.count(target) > 0);
ss->objects[target]->pincount--;
ss->maybe_evict_something();
}
ss = NULL;
target = 0;
}
void dopin(swap_space *newss, uint64_t newtarget) {
assert(ss == NULL && target == 0);
ss = newss;
target = newtarget;
if (target > 0) {
assert(ss->objects.count(target) > 0);
debug(std::cout << "Pinning " << target
<< " (" << ss->objects[target]->target << ")" << std::endl);
ss->objects[target]->pincount++;
}
}
void access(uint64_t tgt, bool dirty) const {
assert(ss->objects.count(tgt) > 0);
object *obj = ss->objects[tgt];
ss->lru_pqueue.erase(obj);
obj->last_access = ss->next_access_time++;
ss->lru_pqueue.insert(obj);
obj->target_is_dirty |= dirty;
ss->load<Referent>(tgt);
ss->maybe_evict_something();
}
swap_space *ss;
uint64_t target;
};
template<class Referent>
class pointer : public serializable {
friend class swap_space;
friend class pin<Referent>;
public:
pointer(void) :
ss(NULL),
target(0)
{}
pointer(const pointer &other) {
ss = other.ss;
target = other.target;
if (target > 0) {
assert(ss->objects.count(target) > 0);
ss->objects[target]->refcount++;
}
}
~pointer(void) {
depoint();
}
void depoint(void) {
if (target == 0)
return;
assert(ss->objects.count(target) > 0);
object *obj = ss->objects[target];
assert(obj->refcount > 0);
if ((--obj->refcount) == 0) {
debug(std::cout << "Erasing " << target << std::endl);
// Load it into memory so we can recursively free stuff
if (obj->target == NULL) {
assert(obj->bsid > 0);
if (!obj->is_leaf) {
ss->load<Referent>(target);
} else {
debug(std::cout << "Skipping load of leaf " << target << std::endl);
}
}
ss->objects.erase(target);
ss->lru_pqueue.erase(obj);
if (obj->target)
delete obj->target;
ss->current_in_memory_objects--;
if (obj->bsid > 0)
ss->backstore->deallocate(obj->bsid);
delete obj;
}
target = 0;
}
pointer & operator=(const pointer &other) {
if (&other != this) {
depoint();
ss = other.ss;
target = other.target;
if (target > 0) {
assert(ss->objects.count(target) > 0);
ss->objects[target]->refcount++;
}
}
return *this;
}
bool operator==(const pointer &other) const {
return ss == other.ss && target == other.target;
}
bool operator!=(const pointer &other) const {
return !operator==(other);
}
// const Referent * operator->(void) const {
// ss->access(target, false);
// return ss->objects[target].target;
// }
const pin<Referent> operator->(void) const {
return pin<Referent>(this);
}
pin<Referent> operator->(void) {
return pin<Referent>(this);
}
pin<Referent> get_pin(void) {
return pin<Referent>(this);
}
const pin<Referent> get_pin(void) const {
return pin<Referent>(this);
}
bool is_in_memory(void) const {
assert(ss->objects.count(target) > 0);
return target > 0 && ss->objects[target]->target != NULL;
}
bool is_dirty(void) const {
assert(ss->objects.count(target) > 0);
return target > 0 && ss->objects[target]->target && ss->objects[target]->target_is_dirty;
}
void _serialize(std::iostream &fs, serialization_context &context) {
assert(target > 0);
assert(context.ss.objects.count(target) > 0);
fs << target << " ";
target = 0;
assert(fs.good());
context.is_leaf = false;
}
void _deserialize(std::iostream &fs, serialization_context &context) {
assert(target == 0);
ss = &context.ss;
fs >> target;
assert(fs.good());
assert(context.ss.objects.count(target) > 0);
// We just created a new reference to this object and
// invalidated the on-disk reference, so the total refcount
// stays the same.
}
private:
swap_space *ss;
uint64_t target;
// Only callable through swap_space::allocate(...)
pointer(swap_space *sspace, Referent *tgt)
{
ss = sspace;
target = sspace->next_id++;
object *o = new object(sspace, tgt);
assert(o != NULL);
target = o->id;
assert(ss->objects.count(target) == 0);
ss->objects[target] = o;
ss->lru_pqueue.insert(o);
ss->current_in_memory_objects++;
ss->maybe_evict_something();
}
};
private:
backing_store *backstore;
uint64_t next_id = 1;
uint64_t next_access_time = 0;
class object {
public:
object(swap_space *sspace, serializable * tgt);
serializable * target;
uint64_t id;
uint64_t bsid;
bool is_leaf;
uint64_t refcount;
uint64_t last_access;
bool target_is_dirty;
uint64_t pincount;
};
static bool cmp_by_last_access(object *a, object *b);
template<class Referent>
void load(uint64_t tgt) {
assert(objects.count(tgt) > 0);
if (objects[tgt]->target == NULL) {
object *obj = objects[tgt];
debug(std::cout << "Loading " << obj->id << std::endl);
std::iostream *in = backstore->get(obj->bsid);
Referent *r = new Referent();
serialization_context ctxt(*this);
deserialize(*in, ctxt, *r);
backstore->put(in);
obj->target = r;
current_in_memory_objects++;
}
}
void set_cache_size(uint64_t sz);
void write_back(object *obj);
void maybe_evict_something(void);
uint64_t max_in_memory_objects;
uint64_t current_in_memory_objects = 0;
std::unordered_map<uint64_t, object *> objects;
std::set<object *, bool (*)(object *, object *)> lru_pqueue;
};
#endif // SWAP_SPACE_HPP