From 55261f3172a18b10caa04db6de7b5f4680b9535f Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 27 Jul 2020 12:47:26 +0300 Subject: [PATCH] loadAt loadAt is new optional storage interface that is intended to replace loadBefore with more clean and uniform semantic. Compared to loadBefore, loadAt: 1) returns data=None and serial of the removal, when loaded object was found to be deleted. loadBefore is returning only data=None in such case. This loadAt property allows to fix DemoStorage data corruption when whiteouts in overlay part were not previously correctly taken into account. https://github.com/zopefoundation/ZODB/issues/318 2) for regular data records, does not require storages to return next_serial, in addition to (data, serial). loadBefore requirement to return both serial and next_serial is constraining storages unnecessarily, and, while for FileStorage it is free to implement, for other storages it is not - for example for NEO and RelStorage, finding out next_serial, after looking up oid@at data record, costs one more SQL query: https://lab.nexedi.com/nexedi/neoppod/blob/fb746e6b/neo/storage/database/mysqldb.py#L484-508 https://lab.nexedi.com/nexedi/neoppod/blob/fb746e6b/neo/storage/database/mysqldb.py#L477-482 https://github.com/zodb/relstorage/blob/3.1.1-1-ge7628f9/src/relstorage/storage/load.py#L259-L264 https://github.com/zodb/relstorage/blob/3.1.1-1-ge7628f9/src/relstorage/adapters/mover.py#L177-L199 next_serial is not only about execution overhead - it is semantically redundant to be there and can be removed from load return. The reason I say that next_serial can be removed is that in ZODB/py the only place, that I could find, where next_serial is used on client side is in client cache (e.g. in NEO client cache), and that cache can be remade to work without using that next_serial at all. In simple words whenever after loadAt(oid, at) -> (data, serial) query, the cache can remember data for oid in [serial, at] range. Next, when invalidation message from server is received, cache entries, that had at == client_head, are extended (at -> new_head) for oids that are not present in invalidation message, while for oids that are present in invalidation message no such extension is done. This allows to maintain cache in correct state, invalidate it when there is a need to invalidate, and not to throw away cache entries that should remain live. This of course requires ZODB server to include both modified and just-created objects into invalidation messages ( https://github.com/zopefoundation/ZEO/pull/160 , https://github.com/zopefoundation/ZODB/pull/319 ). Switching to loadAt should thus allow storages like NEO and, maybe, RelStorage, to do 2x less SQL queries on every object access. https://github.com/zopefoundation/ZODB/issues/318#issuecomment-657685745 In other words loadAt unifies return signature to always be (data, serial) instead of POSKeyError object does not exist at all None object was removed (data, serial, next_serial) regular data record used by loadBefore. This patch: - introduces new interface. - introduces ZODB.utils.loadAt helper, that uses either storage.loadAt, or, if the storage does not implement loadAt interface, tries to mimic loadAt semantic via storage.loadBefore to possible extent + emits corresponding warning. - converts MVCCAdapter to use loadAt instead of loadBefore. - changes DemoStorage to use loadAt, and this way fixes above-mentioned data corruption issue; adds corresponding test; converts DemoStorage.loadBefore to be a wrapper around DemoStorage.loadAt. - adds loadAt implementation to FileStorage and MappingStorage. - adapts other tests/code correspondingly. /cc @jimfulton, @jamadden, @vpelletier, @jmuchemb, @arnaud-fontaine, @gidzit, @klawlf82, @hannosch --- src/ZODB/BaseStorage.py | 7 +- src/ZODB/DB.py | 3 +- src/ZODB/DemoStorage.py | 85 +++++++++++++----------- src/ZODB/FileStorage/FileStorage.py | 36 ++++++++++ src/ZODB/MappingStorage.py | 19 ++++++ src/ZODB/blob.py | 7 +- src/ZODB/historical_connections.rst | 9 ++- src/ZODB/interfaces.py | 21 ++++++ src/ZODB/mvccadapter.py | 32 ++++++--- src/ZODB/tests/IExternalGC.test | 18 +++-- src/ZODB/tests/MVCCMappingStorage.py | 1 + src/ZODB/tests/hexstorage.py | 9 ++- src/ZODB/tests/testConnection.py | 9 +++ src/ZODB/tests/testDemoStorage.py | 99 ++++++++++++++++++++++++++++ src/ZODB/tests/test_storage.py | 19 +++++- src/ZODB/tests/testmvcc.py | 6 +- src/ZODB/utils.py | 52 +++++++++++++-- 17 files changed, 353 insertions(+), 79 deletions(-) diff --git a/src/ZODB/BaseStorage.py b/src/ZODB/BaseStorage.py index ec2864b47..0f5140f5b 100644 --- a/src/ZODB/BaseStorage.py +++ b/src/ZODB/BaseStorage.py @@ -59,7 +59,7 @@ class BaseStorage(UndoLogCompatible): If it stores multiple revisions, it should implement loadSerial() - loadBefore() + loadAt() Each storage will have two locks that are accessed via lock acquire and release methods bound to the instance. (Yuck.) @@ -267,9 +267,8 @@ def loadSerial(self, oid, serial): raise POSException.Unsupported( "Retrieval of historical revisions is not supported") - def loadBefore(self, oid, tid): - """Return most recent revision of oid before tid committed.""" - return None + # do not provide loadAt/loadBefore here in BaseStorage - if child forgets + # to override it - storage will always return "no data" instead of failing. def copyTransactionsFrom(self, other, verbose=0): """Copy transactions from another storage. diff --git a/src/ZODB/DB.py b/src/ZODB/DB.py index 084b5f273..4981cc1d2 100644 --- a/src/ZODB/DB.py +++ b/src/ZODB/DB.py @@ -726,8 +726,7 @@ def open(self, transaction_manager=None, at=None, before=None): - `before`: like `at`, but opens the readonly state before the tid or datetime. """ - # `at` is normalized to `before`, since we use storage.loadBefore - # as the underlying implementation of both. + # `at` is normalized to `before`. before = getTID(at, before) if (before is not None and before > self.lastTransaction() and diff --git a/src/ZODB/DemoStorage.py b/src/ZODB/DemoStorage.py index 439f07850..625fd570c 100644 --- a/src/ZODB/DemoStorage.py +++ b/src/ZODB/DemoStorage.py @@ -24,6 +24,7 @@ import random import weakref import tempfile +import warnings import ZODB.BaseStorage import ZODB.blob import ZODB.interfaces @@ -217,45 +218,55 @@ def __len__(self): # still want load for old clients (e.g. zeo servers) load = load_current - def loadBefore(self, oid, tid): - try: - result = self.changes.loadBefore(oid, tid) - except ZODB.POSException.POSKeyError: - # The oid isn't in the changes, so defer to base - return self.base.loadBefore(oid, tid) + def loadAt(self, oid, at): + data, serial = ZODB.utils.loadAt(self.changes, oid, at) + if (data is not None) or (serial != ZODB.utils.z64): + # object is present in changes either as data or deletion record. + return data, serial - if result is None: - # The oid *was* in the changes, but there aren't any - # earlier records. Maybe there are in the base. - try: - result = self.base.loadBefore(oid, tid) - except ZODB.POSException.POSKeyError: - # The oid isn't in the base, so None will be the right result - pass + # object is not present in changes at all - use base + return ZODB.utils.loadAt(self.base, oid, at) + + def loadBefore(self, oid, before): + warnings.warn("loadBefore is deprecated - use loadAt instead", + DeprecationWarning, stacklevel=2) + p64 = ZODB.utils.p64 + u64 = ZODB.utils.u64 + + if before in (maxtid, ZODB.utils.z64): + at = before + else: + at = p64(u64(before)-1) + + data, serial = self.loadAt(oid, at) + + # find out next_serial. + # it is ok to use dumb/slow implementation since loadBefore should not + # be used and is provided only for backward compatibility. + next_serial = maxtid + while 1: + _, s = self.loadAt(oid, p64(u64(next_serial)-1)) + assert s >= serial + if s == serial: + # found - next_serial is serial of the next data record + break + next_serial = s + + if next_serial == maxtid: + next_serial = None + + # next_serial found -> return/raise what loadBefore users expect + if data is None: + if next_serial is None: + # object was never created + raise ZODB.POSException.POSKeyError(oid) else: - if result and not result[-1]: - # The oid is current in the base. We need to find - # the end tid in the base by fining the first tid - # in the changes. Unfortunately, there isn't an - # api for this, so we have to walk back using - # loadBefore. - - if tid == maxtid: - # Special case: we were looking for the - # current value. We won't find anything in - # changes, so we're done. - return result - - end_tid = maxtid - t = self.changes.loadBefore(oid, end_tid) - while t: - end_tid = t[1] - t = self.changes.loadBefore(oid, end_tid) - result = result[:2] + ( - end_tid if end_tid != maxtid else None, - ) - - return result + # object was deleted + return None + + # regular data record + return data, serial, next_serial + def loadBlob(self, oid, serial): try: diff --git a/src/ZODB/FileStorage/FileStorage.py b/src/ZODB/FileStorage/FileStorage.py index 7b84d92a2..4f6d1fd44 100644 --- a/src/ZODB/FileStorage/FileStorage.py +++ b/src/ZODB/FileStorage/FileStorage.py @@ -21,6 +21,7 @@ import logging import os import time +import warnings from struct import pack from struct import unpack @@ -57,6 +58,7 @@ from ZODB.interfaces import IStorageIteration from ZODB.interfaces import IStorageRestoreable from ZODB.interfaces import IStorageUndoable +from ZODB.interfaces import IStorageLoadAt from ZODB.POSException import ConflictError from ZODB.POSException import MultipleUndoErrors from ZODB.POSException import POSKeyError @@ -133,6 +135,7 @@ def __init__(self, afile): IStorageCurrentRecordIteration, IExternalGC, IStorage, + IStorageLoadAt, ) class FileStorage( FileStorageFormatter, @@ -559,7 +562,40 @@ def loadSerial(self, oid, serial): else: return self._loadBack_impl(oid, h.back)[0] + def loadAt(self, oid, at): + """loadAt implements IStorageLoadAt.""" + with self._files.get() as _file: + try: + pos = self._lookup_pos(oid) + except POSKeyError: + # object does not exist + return None, z64 + + while 1: + h = self._read_data_header(pos, oid, _file) + if h.tid <= at: + break + pos = h.prev + if not pos: + # object not yet created as of at + return None, z64 + + # h is the most recent DataHeader with .tid <= at + if h.plen: + # regular data record + return _file.read(h.plen), h.tid + elif h.back: + # backpointer + data, _, _, _ = self._loadBack_impl(oid, h.back, + fail=False, _file=_file) + return data, h.tid + else: + # deletion + return None, h.tid + def loadBefore(self, oid, tid): + warnings.warn("loadBefore is deprecated - use loadAt instead", + DeprecationWarning, stacklevel=2) with self._files.get() as _file: pos = self._lookup_pos(oid) end_tid = None diff --git a/src/ZODB/MappingStorage.py b/src/ZODB/MappingStorage.py index 8d74bb81b..6d85dd9c8 100644 --- a/src/ZODB/MappingStorage.py +++ b/src/ZODB/MappingStorage.py @@ -19,6 +19,7 @@ import BTrees import time +import warnings import ZODB.BaseStorage import ZODB.interfaces import ZODB.POSException @@ -30,6 +31,7 @@ @zope.interface.implementer( ZODB.interfaces.IStorage, ZODB.interfaces.IStorageIteration, + ZODB.interfaces.IStorageLoadAt, ) class MappingStorage(object): """In-memory storage implementation @@ -148,9 +150,26 @@ def __len__(self): load = ZODB.utils.load_current + # ZODB.interfaces.IStorageLoadAt + @ZODB.utils.locked(opened) + def loadAt(self, oid, at): + z64 = ZODB.utils.z64 + tid_data = self._data.get(oid) + if not tid_data: + return None, z64 + if at == z64: + return None, z64 + tids_at = tid_data.keys(None, at) + if not tids_at: + return None, z64 + serial = tids_at[-1] + return tid_data[serial], serial + # ZODB.interfaces.IStorage @ZODB.utils.locked(opened) def loadBefore(self, oid, tid): + warnings.warn("loadBefore is deprecated - use loadAt instead", + DeprecationWarning, stacklevel=2) tid_data = self._data.get(oid) if tid_data: before = ZODB.utils.u64(tid) diff --git a/src/ZODB/blob.py b/src/ZODB/blob.py index 0a1420737..cf1b90e77 100644 --- a/src/ZODB/blob.py +++ b/src/ZODB/blob.py @@ -866,10 +866,10 @@ def undo(self, serial_id, transaction): for oid in self.fshelper.getOIDsForSerial(serial_id): # we want to find the serial id of the previous revision # of this blob object. - load_result = self.loadBefore(oid, serial_id) - - if load_result is None: + at_before = utils.p64(utils.u64(serial_id)-1) + _, serial_before = utils.loadAt(self, oid, at_before) + if serial_before == utils.z64: # There was no previous revision of this blob # object. The blob was created in the transaction # represented by serial_id. We copy the blob data @@ -884,7 +884,6 @@ def undo(self, serial_id, transaction): # transaction implied by "serial_id". We copy the blob # data to a new file that references the undo transaction # in case a user wishes to undo this undo. - data, serial_before, serial_after = load_result orig_fn = self.fshelper.getBlobFilename(oid, serial_before) new_fn = self.fshelper.getBlobFilename(oid, undo_serial) with open(orig_fn, "rb") as orig: diff --git a/src/ZODB/historical_connections.rst b/src/ZODB/historical_connections.rst index 14a6d4f25..a294718b2 100644 --- a/src/ZODB/historical_connections.rst +++ b/src/ZODB/historical_connections.rst @@ -36,7 +36,7 @@ development continues on a "development" head. A database can be opened historically ``at`` or ``before`` a given transaction serial or datetime. Here's a simple example. It should work with any storage -that supports ``loadBefore``. +that supports ``loadAt`` or ``loadBefore``. We'll begin our example with a fairly standard set up. We @@ -138,10 +138,9 @@ root. >>> historical_conn.root()['first']['count'] 0 -In fact, ``at`` arguments are translated into ``before`` values because the -underlying mechanism is a storage's loadBefore method. When you look at a -connection's ``before`` attribute, it is normalized into a ``before`` serial, -no matter what you pass into ``db.open``. +In fact, ``at`` arguments are translated into ``before`` values. +When you look at a connection's ``before`` attribute, it is normalized into a +``before`` serial, no matter what you pass into ``db.open``. >>> print(conn.before) None diff --git a/src/ZODB/interfaces.py b/src/ZODB/interfaces.py index e73016d78..ceeab9415 100644 --- a/src/ZODB/interfaces.py +++ b/src/ZODB/interfaces.py @@ -710,6 +710,9 @@ def __len__(): def loadBefore(oid, tid): """Load the object data written before a transaction id + ( This method is deprecated and kept for backward-compatibility. + Please use loadAt instead. ) + If there isn't data before the object before the given transaction, then None is returned, otherwise three values are returned: @@ -886,6 +889,24 @@ def prefetch(oids, tid): more than once. """ +class IStorageLoadAt(Interface): + + def loadAt(oid, at): # -> (data, serial) + """Load object data as observed at given database state. + + loadAt returns data for object with given object ID as observed by + database state ≤ at. Two values are returned: + + - The data record, + - The transaction ID of the data record. + + If the object does not exist, or is deleted as of `at` database state, + loadAt returns data=None, and serial indicates transaction ID of the + most recent deletion done in transaction with ID ≤ at, or null tid if + there is no such deletion. + + Note: no POSKeyError is raised even if object id is not in the storage. + """ class IMultiCommitStorage(IStorage): """A multi-commit storage can commit multiple transactions at once. diff --git a/src/ZODB/mvccadapter.py b/src/ZODB/mvccadapter.py index 56e95c09f..5b3088993 100644 --- a/src/ZODB/mvccadapter.py +++ b/src/ZODB/mvccadapter.py @@ -10,7 +10,7 @@ import zope.interface from . import interfaces, serialize, POSException -from .utils import p64, u64, Lock, oid_repr, tid_repr +from .utils import p64, u64, z64, maxtid, Lock, loadAt, oid_repr, tid_repr class Base(object): @@ -99,7 +99,7 @@ class MVCCAdapterInstance(Base): 'checkCurrentSerialInTransaction', 'tpc_abort', ) - _start = None # Transaction start time + _start = None # Transaction start time (before) _ltid = b'' # Last storage transaction id def __init__(self, base): @@ -151,8 +151,20 @@ def poll_invalidations(self): def load(self, oid): assert self._start is not None - r = self._storage.loadBefore(oid, self._start) - if r is None: + at = p64(u64(self._start)-1) + data, serial = loadAt(self._storage, oid, at) + if data is None: + # raise POSKeyError if object does not exist at all + # TODO raise POSKeyError always and switch to raising ReadOnlyError only when + # actually detecting that load is being affected by simultaneous pack (see below). + if serial == z64: + # XXX second call to loadAt - it will become unneeded once we + # switch to raising POSKeyError. + _, serial_exists = loadAt(self._storage, oid, maxtid) + if serial_exists == z64: + # object does not exist at all + raise POSException.POSKeyError(oid) + # object was deleted or not-yet-created. # raise ReadConflictError - not - POSKeyError due to backward # compatibility: a pack(t+δ) could be running simultaneously to our @@ -174,11 +186,14 @@ def load(self, oid): # whether pack is/was actually running and its details, take that # into account, and raise ReadConflictError only in the presence of # database being simultaneously updated from back of its log. + # + # See https://github.com/zopefoundation/ZODB/pull/322 for + # preliminary steps in this direction. raise POSException.ReadConflictError( "load %s @%s: object deleted, likely by simultaneous pack" % (oid_repr(oid), tid_repr(p64(u64(self._start) - 1)))) - return r[:2] + return data, serial def prefetch(self, oids): try: @@ -257,10 +272,11 @@ def poll_invalidations(self): new_oid = pack = store = read_only_writer def load(self, oid, version=''): - r = self._storage.loadBefore(oid, self._before) - if r is None: + at = p64(u64(self._before)-1) + data, serial = loadAt(self._storage, oid, at) + if data is None: raise POSException.POSKeyError(oid) - return r[:2] + return data, serial class UndoAdapterInstance(Base): diff --git a/src/ZODB/tests/IExternalGC.test b/src/ZODB/tests/IExternalGC.test index 52983d36f..f2296bea0 100644 --- a/src/ZODB/tests/IExternalGC.test +++ b/src/ZODB/tests/IExternalGC.test @@ -16,6 +16,7 @@ A create_storage function is provided that creates a storage. >>> transaction.commit() >>> oid0 = conn.root()[0]._p_oid >>> oid1 = conn.root()[1]._p_oid + >>> atLive = conn.root()._p_serial >>> del conn.root()[0] >>> del conn.root()[1] >>> transaction.commit() @@ -66,9 +67,10 @@ Now if we try to load data for the objects, we get a POSKeyError: We can still get the data if we load before the time we deleted. - >>> storage.loadBefore(oid0, conn.root()._p_serial) == (p0, s0, tid) + >>> from ZODB.utils import loadAt, z64 + >>> loadAt(storage, oid0, atLive) == (p0, s0) True - >>> storage.loadBefore(oid1, conn.root()._p_serial) == (p1, s1, tid) + >>> loadAt(storage, oid1, atLive) == (p1, s1) True >>> with open(storage.loadBlob(oid1, s1)) as fp: fp.read() 'some data' @@ -92,15 +94,11 @@ gone: ... POSKeyError: ... - >>> storage.loadBefore(oid0, conn.root()._p_serial) # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - POSKeyError: ... + >>> loadAt(storage, oid0, atLive) == (None, z64) + True - >>> storage.loadBefore(oid1, conn.root()._p_serial) # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - POSKeyError: ... + >>> loadAt(storage, oid1, atLive) == (None, z64) + True >>> storage.loadBlob(oid1, s1) # doctest: +ELLIPSIS Traceback (most recent call last): diff --git a/src/ZODB/tests/MVCCMappingStorage.py b/src/ZODB/tests/MVCCMappingStorage.py index e87b0be80..4c76b12a9 100644 --- a/src/ZODB/tests/MVCCMappingStorage.py +++ b/src/ZODB/tests/MVCCMappingStorage.py @@ -46,6 +46,7 @@ def new_instance(self): inst.new_oid = self.new_oid inst.pack = self.pack inst.loadBefore = self.loadBefore + inst.loadAt = self.loadAt inst._ltid = self._ltid inst._main_lock = self._lock return inst diff --git a/src/ZODB/tests/hexstorage.py b/src/ZODB/tests/hexstorage.py index f27c0e3ac..b4f556a7f 100644 --- a/src/ZODB/tests/hexstorage.py +++ b/src/ZODB/tests/hexstorage.py @@ -39,6 +39,13 @@ def __init__(self, base): setattr(self, name, v) zope.interface.directlyProvides(self, zope.interface.providedBy(base)) + if hasattr(base, 'loadAt') and 'loadAt' not in self.copied_methods: + def loadAt(oid, at): + data, serial = self.base.loadAt(oid, at) + if data is not None: + data = unhexlify(data[2:]) + return data, serial + self.loadAt = loadAt def __getattr__(self, name): return getattr(self.base, name) @@ -130,7 +137,7 @@ class ServerHexStorage(HexStorage): """ copied_methods = HexStorage.copied_methods + ( - 'load', 'loadBefore', 'loadSerial', 'store', 'restore', + 'load', 'loadAt', 'loadBefore', 'loadSerial', 'store', 'restore', 'iterator', 'storeBlob', 'restoreBlob', 'record_iternext', ) diff --git a/src/ZODB/tests/testConnection.py b/src/ZODB/tests/testConnection.py index 7d36cca63..a579c98d8 100644 --- a/src/ZODB/tests/testConnection.py +++ b/src/ZODB/tests/testConnection.py @@ -1314,7 +1314,16 @@ def load(self, oid, version=''): raise TypeError('StubStorage does not support versions.') return self._data[oid] + def loadAt(self, oid, at): + try: + data, serial = self._transdata[oid] + except KeyError: + return None, z64 + return data, serial + def loadBefore(self, oid, tid): + warnings.warn("loadBefore is deprecated - use loadAt instead", + DeprecationWarning, stacklevel=2) return self._data[oid] + (None, ) def store(self, oid, serial, p, version, transaction): diff --git a/src/ZODB/tests/testDemoStorage.py b/src/ZODB/tests/testDemoStorage.py index 25d32e217..90d4cb185 100644 --- a/src/ZODB/tests/testDemoStorage.py +++ b/src/ZODB/tests/testDemoStorage.py @@ -23,6 +23,7 @@ StorageTestBase, Synchronization, ) +from ZODB.tests.MinPO import MinPO import os if os.environ.get('USE_ZOPE_TESTING_DOCTEST'): @@ -33,7 +34,9 @@ import re import transaction import unittest +import ZODB.Connection import ZODB.DemoStorage +import ZODB.FileStorage import ZODB.tests.hexstorage import ZODB.tests.util import ZODB.utils @@ -264,6 +267,101 @@ def load_before_base_storage_current(): >>> base.close() """ +# additional DemoStorage tests that do not fit into common DemoStorageTests setup. +class DemoStorageTests2(ZODB.tests.util.TestCase): + def checkLoadAfterDelete(self): + """Verify that DemoStorage correctly handles load requests for objects + deleted in read-write part of the storage. + + https://github.com/zopefoundation/ZODB/issues/318 + """ + FileStorage = ZODB.FileStorage.FileStorage + DemoStorage = ZODB.DemoStorage.DemoStorage + TransactionMetaData = ZODB.Connection.TransactionMetaData + + # mkbase prepares base part of the storage. + def mkbase(): # -> zbase + zbase = FileStorage("base.fs") + db = DB(zbase) + conn = db.open() + root = conn.root() + + root['obj'] = obj = MinPO(0) + transaction.commit() + + obj.value += 1 + transaction.commit() + + conn.close() + db.close() + zbase.close() + + zbase = FileStorage("base.fs", read_only=True) + return zbase + + # prepare base + overlay + zbase = mkbase() + zoverlay = FileStorage("overlay.fs") + zdemo = DemoStorage(base=zbase, changes=zoverlay) + + # overlay: modify obj and root + db = DB(zdemo) + conn = db.open() + root = conn.root() + obj = root['obj'] + oid = obj._p_oid + obj.value += 1 + # modify root as well so that there is root revision saved in overlay that points to obj + root['x'] = 1 + transaction.commit() + atLive = obj._p_serial + + # overlay: delete obj from root making it a garbage + del root['obj'] + transaction.commit() + atUnlink = root._p_serial + + # unmount DemoStorage + conn.close() + db.close() + zdemo.close() # closes zbase and zoverlay as well + del zbase, zoverlay + + # simulate GC on base+overlay + zoverlay = FileStorage("overlay.fs") + txn = transaction.get() + txn_meta = TransactionMetaData(txn.user, txn.description, txn.extension) + zoverlay.tpc_begin(txn_meta) + zoverlay.deleteObject(oid, atLive, txn_meta) + zoverlay.tpc_vote(txn_meta) + atGC = zoverlay.tpc_finish(txn_meta) + + # remount base+overlay + zbase = FileStorage("base.fs", read_only=True) + zdemo = ZODB.DemoStorage.DemoStorage(base=zbase, changes=zoverlay) + db = DB(zdemo) + + # verify: + # load(obj, atLive) -> 2 + # load(obj, atUnlink) -> 2 (garbage, but still in DB) + # load(obj, atGC) -> POSKeyError, not 1 from base + def getObjAt(at): + conn = db.open(at=at) + obj = conn.get(oid) + self.assertIsInstance(obj, MinPO) + v = obj.value + conn.close() + return v + + self.assertEqual(getObjAt(atLive), 2) + self.assertEqual(getObjAt(atUnlink), 2) + self.assertRaises(ZODB.POSException.POSKeyError, getObjAt, atGC) + + # end + db.close() + zdemo.close() # closes zbase and zoverlay as well + + def test_suite(): suite = unittest.TestSuite(( doctest.DocTestSuite( @@ -285,4 +383,5 @@ def test_suite(): 'check')) suite.addTest(unittest.makeSuite(DemoStorageWrappedAroundHexMappingStorage, 'check')) + suite.addTest(unittest.makeSuite(DemoStorageTests2, 'check')) return suite diff --git a/src/ZODB/tests/test_storage.py b/src/ZODB/tests/test_storage.py index 6cb47ccb1..ae26a3abe 100644 --- a/src/ZODB/tests/test_storage.py +++ b/src/ZODB/tests/test_storage.py @@ -20,10 +20,11 @@ """ import bisect import unittest +import warnings from ZODB.BaseStorage import BaseStorage from ZODB import POSException -from ZODB.utils import z64 +from ZODB.utils import p64, u64, z64 from ZODB.tests import StorageTestBase from ZODB.tests import BasicStorage, MTStorage, Synchronization @@ -105,6 +106,11 @@ def _finish(self, tid, u, d, e): self._ltid = self._tid def loadBefore(self, the_oid, the_tid): + warnings.warn("loadBefore is deprecated - use loadAt instead", + DeprecationWarning, stacklevel=2) + return self._loadBefore(the_oid, the_tid) + + def _loadBefore(self, the_oid, the_tid): # It's okay if loadBefore() is really expensive, because this # storage is just used for testing. with self._lock: @@ -126,6 +132,17 @@ def loadBefore(self, the_oid, the_tid): return self._index[(the_oid, tid)], tid, end_tid + def loadAt(self, oid, at): + try: + r = self._loadBefore(oid, p64(u64(at)+1)) + except KeyError: + return None, z64 + if r is None: + # not-yet created (deleteObject not supported -> serial=0) + return None, z64 + data, serial, _ = r + return data, serial + def loadSerial(self, oid, serial): return self._index[(oid, serial)] diff --git a/src/ZODB/tests/testmvcc.py b/src/ZODB/tests/testmvcc.py index d8f13c8ca..f58f35cfb 100644 --- a/src/ZODB/tests/testmvcc.py +++ b/src/ZODB/tests/testmvcc.py @@ -35,7 +35,7 @@ ***IMPORTANT***: The MVCC approach has changed since these tests were originally written. The new approach is much simpler because we no longer call load to get the current state of an object. We call -loadBefore instead, having gotten a transaction time at the start of a +loadAt instead, having gotten a transaction time at the start of a transaction. As a result, the rhythm of the tests is a little odd, because we no longer need to probe a complex dance that doesn't exist any more. @@ -290,7 +290,7 @@ Now deactivate "b" in the first connection, and (re)fetch it. The first connection should still see 1, due to MVCC, but to get this old state -TmpStore needs to handle the loadBefore() method. +TmpStore needs to handle the loadAt() or loadBefore() methods. >>> r1["b"]._p_deactivate() @@ -322,7 +322,7 @@ Rather than add all the complexity of ZEO to these tests, the MinimalMemoryStorage has a hook. We'll write a subclass that will -deliver an invalidation when it loads (or loadBefore's) an object. +deliver an invalidation when it loads (or loadAt's) an object. The hook allows us to test the Connection code. >>> class TestStorage(MinimalMemoryStorage): diff --git a/src/ZODB/utils.py b/src/ZODB/utils.py index 1df2460d5..199950b58 100644 --- a/src/ZODB/utils.py +++ b/src/ZODB/utils.py @@ -17,6 +17,7 @@ import sys import time import threading +import warnings from binascii import hexlify, unhexlify from tempfile import mkstemp @@ -382,8 +383,51 @@ def load_current(storage, oid, version=''): some time in the future. """ assert not version - r = storage.loadBefore(oid, maxtid) - if r is None: + data, serial = loadAt(storage, oid, maxtid) + if data is None: raise ZODB.POSException.POSKeyError(oid) - assert r[2] is None - return r[:2] + return data, serial + + +_loadAtWarned = set() # of storage class +def loadAt(storage, oid, at): + """loadAt provides IStorageLoadAt semantic for all storages. + + Storages that do not implement loadAt are served via loadBefore. + """ + load_at = getattr(storage, 'loadAt', None) + if load_at is not None: + return load_at(oid, at) + + # storage does not provide IStorageLoadAt - warn + fall back to loadBefore + if type(storage) not in _loadAtWarned: + # there is potential race around _loadAtWarned access, but due to the + # GIL this race cannot result in that set corruption, and can only lead + # to us emitting the warning twice instead of just once. + # -> do not spend CPU on lock and just ignore it. + warnings.warn( + "FIXME %s does not provide loadAt - emulating it via loadBefore, but ...\n" + "\t... 1) access will be potentially slower, and\n" + "\t... 2) not full semantic of loadAt could be provided.\n" + "\t... this can lead to data corruption.\n" + "\t... -> please see https://github.com/zopefoundation/ZODB/issues/318 for details." % + type(storage), DeprecationWarning) + _loadAtWarned.add(type(storage)) + + if at == maxtid: + before = at + else: + before = p64(u64(at)+1) + + try: + r = storage.loadBefore(oid, before) + except ZODB.POSException.POSKeyError: + return (None, z64) # object does not exist at all + + if r is None: + # object was removed; however loadBefore does not tell when. + # return serial=0 - this is the "data corruption" case talked about above. + return (None, z64) + + data, serial, next_serial = r + return (data, serial)