diff --git a/semidbm/compat.py b/semidbm/compat.py index 3785353..914a581 100644 --- a/semidbm/compat.py +++ b/semidbm/compat.py @@ -15,9 +15,11 @@ str_type = str -DATA_OPEN_FLAGS = os.O_RDWR | os.O_CREAT | os.O_APPEND +DATA_OPEN_FLAGS = "r+" +DATA_OPEN_FLAGS_READ = "r" if sys.platform.startswith('win'): # On windows we need to specify that we should be # reading the file as a binary file so it doesn't # change any line ending characters. - DATA_OPEN_FLAGS = DATA_OPEN_FLAGS | os.O_BINARY + DATA_OPEN_FLAGS += "b" + DATA_OPEN_FLAGS_READ += "b" diff --git a/semidbm/db.py b/semidbm/db.py index 5530508..62b1f07 100644 --- a/semidbm/db.py +++ b/semidbm/db.py @@ -18,6 +18,8 @@ _open = compat.file_open +SEEK_SET = 0 +SEEK_END = 2 class _SemiDBM(object): """ @@ -26,6 +28,8 @@ class _SemiDBM(object): does not exist it will be created. """ + open_flags = compat.DATA_OPEN_FLAGS + def __init__(self, dbdir, renamer, data_loader=None, verify_checksums=False): self._renamer = renamer @@ -36,7 +40,6 @@ def __init__(self, dbdir, renamer, data_loader=None, self._index = None self._data_fd = None self._verify_checksums = verify_checksums - self._current_offset = 0 self._load_db() def _create_db_dir(self): @@ -46,8 +49,7 @@ def _create_db_dir(self): def _load_db(self): self._create_db_dir() self._index = self._load_index(self._data_filename) - self._data_fd = os.open(self._data_filename, compat.DATA_OPEN_FLAGS) - self._current_offset = os.lseek(self._data_fd, 0, os.SEEK_END) + self._data_fd = _open(self._data_filename, self.open_flags) def _load_index(self, filename): # This method is only used upon instantiation to populate @@ -85,25 +87,22 @@ def _load_index_from_fileobj(self, filename): index[key_name] = (offset, size) return index - def __getitem__(self, key, read=os.read, lseek=os.lseek, - seek_set=os.SEEK_SET, str_type=compat.str_type, - isinstance=isinstance): + def __getitem__(self, key, str_type = compat.str_type, + isinstance = isinstance): if isinstance(key, str_type): key = key.encode('utf-8') offset, size = self._index[key] - lseek(self._data_fd, offset, seek_set) - if not self._verify_checksums: - return read(self._data_fd, size) - else: + self._data_fd.seek(offset, SEEK_SET) + value = self._data_fd.read(size) + if self._verify_checksums: # Checksum is at the end of the value. - data = read(self._data_fd, size + 4) - return self._verify_checksum_data(key, data) + return self._verify_checksum_data(key, value, self._data_fd.read(4)) + return value - def _verify_checksum_data(self, key, data): + def _verify_checksum_data(self, key, value, chk): # key is the bytes of the key, # data is the bytes of the value + 4 byte checksum at the end. - value = data[:-4] - expected = struct.unpack('!I', data[-4:])[0] + expected = struct.unpack('!I', chk)[0] actual = crc32(key) actual = crc32(value, actual) if actual & 0xffffffff != expected: @@ -111,7 +110,7 @@ def _verify_checksum_data(self, key, data): "Corrupt data detected: invalid checksum for key %s" % key) return value - def __setitem__(self, key, value, len=len, crc32=crc32, write=os.write, + def __setitem__(self, key, value, len=len, crc32=crc32, str_type=compat.str_type, pack=struct.pack, isinstance=isinstance): if isinstance(key, str_type): @@ -119,6 +118,7 @@ def __setitem__(self, key, value, len=len, crc32=crc32, write=os.write, if isinstance(value, str_type): value = value.encode('utf-8') # Write the new data out at the end of the file. + self._data_fd.seek(0, SEEK_END) # Format is # 4 bytes 4bytes 4bytes # <keysize><valsize><key><val><keyvalcksum> @@ -130,16 +130,15 @@ def __setitem__(self, key, value, len=len, crc32=crc32, write=os.write, checksum = pack('!I', crc32(keyval) & 0xffffffff) blob = keyval_size + keyval + checksum - write(self._data_fd, blob) - # Update the in memory index. - self._index[key] = (self._current_offset + 8 + key_size, - val_size) - self._current_offset += len(blob) + # Update the in memory index to point to the val. + self._index[key] = self._data_fd.tell() + 8 + key_size, val_size + # Write the blob + self._data_fd.write(blob) def __contains__(self, key): return key in self._index - def __delitem__(self, key, len=len, write=os.write, deleted=_DELETED, + def __delitem__(self, key, len=len, deleted=_DELETED, str_type=compat.str_type, isinstance=isinstance, crc32=crc32, pack=struct.pack): if isinstance(key, str_type): @@ -148,9 +147,8 @@ def __delitem__(self, key, len=len, write=os.write, deleted=_DELETED, crc = pack('!I', crc32(key) & 0xffffffff) blob = key_size + key + crc - write(self._data_fd, blob) + self._data_fd.write(blob) del self._index[key] - self._current_offset += len(blob) def __iter__(self): for key in self._index: @@ -181,7 +179,8 @@ def close(self, compact=False): if compact: self.compact() self.sync() - os.close(self._data_fd) + self._data_fd.close() + self._data_fd = None def sync(self): """Sync the db to disk. @@ -196,7 +195,7 @@ def sync(self): """ # The files are opened unbuffered so we don't technically # need to flush the file objects. - os.fsync(self._data_fd) + self._data_fd.flush() def compact(self): """Compact the db to reduce space. @@ -223,7 +222,8 @@ def compact(self): new_db[key] = self[key] new_db.sync() new_db.close() - os.close(self._data_fd) + self._data_fd.close() + self._data_fd = None self._renamer(new_db._data_filename, self._data_filename) os.rmdir(new_db._dbdir) # The index is already compacted so we don't need to compact it. @@ -231,6 +231,8 @@ def compact(self): class _SemiDBMReadOnly(_SemiDBM): + open_flags = compat.DATA_OPEN_FLAGS_READ + def __delitem__(self, key): self._method_not_allowed('delitem') @@ -247,7 +249,8 @@ def _method_not_allowed(self, method_name): raise DBMError("Can't %s: db opened in read only mode." % method_name) def close(self, compact=False): - os.close(self._data_fd) + self._data_fd.close() + self._data_fd = None class _SemiDBMReadWrite(_SemiDBM):