Skip to content

Commit

Permalink
strarray
Browse files Browse the repository at this point in the history
  • Loading branch information
RamanjaneyuluIdavalapati committed Jun 17, 2018
1 parent 72b3f56 commit d1822fb
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 11 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ deploy:
- provider: releases
api_key:
secure: KuV+GjVaFNKvhpI3rgBjolmdhtRWbnJSOgy2iDT4+GQMEz+ypXF3XGR8Opx8NDDeFoBzRxcLcfqRo0Og/i06n5IZ/GcqppErJxYGb984qAJVymukm7pUO4+tls10pDrzZH0+4tTp3SNHukUlcUFjk/+bCTrD67uCZsQGCob3aflLBNx+uL+q3TinF/gbLKdf6wLQqVzkye//ZC20zjZWLRQpyQPRAH1CgGKtRETo5BgSq9w4LbGZd0pGc3S3b33wf3MVzfVlepuXHtwEpviXlXYImRX8/giw6SIx/EJN5IZFkeyGFBetdPsN6dCcOiWAlaFAlrsUSb/YtlrNWZOizkUpmzlAmPTgpl/rW1kS2UUxjLMV1w3oaBt8bhRhX97C0SI0gO2cMWO4E2NIqUFG+rz7Y9VBb/ZpWTlaT5odU+paIBYT0ii6m79YYVu53ajyB6e26zN1Mw12fmRlzBTWsZopxVa22P1+zuIEqtN9meMu5KKONuQ7FL3iNphA8RGguj9X6NKVy4PbbO/25fGScy1oTxsAVCDsiq9x3M+tFg8+9g1fJJ1Ry30wq2cqe1L9o3AaEcuoIvBhf2cIj2ZO1NQAFr9/pkr7t4w/HfJsrRGmlK4hLFkNwZUPdufIS/1s/66lHIiaXacM069xz47zpuxNftjjF3DoZX5Ge/wjKn8=
name: diskarray-0.1.6
tag_name: 0.1.6
name: diskarray-0.1.7
tag_name: 0.1.7
on:
repo: deep-compute/diskarray
- provider: pypi
Expand Down
1 change: 1 addition & 0 deletions diskarray/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .command import main
from .diskarray import DiskArray
from .vararray import DiskVarArray
from .strarray import DiskStringArray
32 changes: 28 additions & 4 deletions diskarray/diskarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@
from .exception import AppendNotSupported

class DiskArray(object):
'''
Stores binary data on disk as a memory mapped file
using numpy.memmap. Allows for growing the disk data
by appending and extending.
Links:
* https://en.wikipedia.org/wiki/Memory-mapped_file
# FIXME:
1. Explain capacity and actual shape
2. Explain growby
3. Explain not having to specify shape for 1d arrays
4. Explain using structured arrays
5. Why memory mapping? What does it provide?
6. Why not use np.save and np.load?
'''
GROWBY = 10000

def __init__(self, fpath, dtype, mode='r+', shape=None,
Expand Down Expand Up @@ -69,10 +85,13 @@ def _shape_bytes(self, shape, dtype_bytes):

def _truncate_if_needed(self):
fd = os.open(self._fpath, os.O_RDWR|os.O_CREAT)
dtype_bytes = np.dtype(self._dtype).itemsize
nbytes = self._shape_bytes(self._shape, dtype_bytes)
os.ftruncate(fd, nbytes)
self._capacity_shape = self._shape
try:
dtype_bytes = np.dtype(self._dtype).itemsize
nbytes = self._shape_bytes(self._shape, dtype_bytes)
os.ftruncate(fd, nbytes)
self._capacity_shape = self._shape
finally:
os.close(fd)
self._create_ndarray()

@property
Expand Down Expand Up @@ -184,6 +203,11 @@ def grow(self, n):
# FIXME: code
pass

def close(self):
self.data._mmap.close()
del self.data
del self._fpath

def truncate(self, n):
# FIXME: code
pass
Expand Down
26 changes: 26 additions & 0 deletions diskarray/strarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from logging import Logger

import numpy as np

from .vararray import DiskVarArray

class DiskStringArray(DiskVarArray):
# Index to word
def __init__(self, dpath, mode='r+',
growby=DiskVarArray.GROWBY,
log=Logger):
super(DiskStringArray, self).__init__(dpath,
dtype=np.uint8, dtype_index=np.uint64,
mode=mode, growby=growby, log=log)

def __getitem__(self, idx):
data = super(DiskStringArray, self).__getitem__(idx)
return data.tostring()

def append(self, v):
v = np.array(list(v), dtype=np.uint8)
return super(DiskStringArray, self).append(v)

def extend(self, v):
v = [np.array(list(x), dtype=np.uint8) for x in v]
return super(DiskStringArray, self).extend(v)
17 changes: 13 additions & 4 deletions diskarray/vararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ def num_lists(self):
'''
return len(self.index)

def __len__(self):
return self.num_lists

def append(self, v):
'''
>>> d = DiskVarArray('/tmp/test3', dtype='uint32')
Expand All @@ -124,10 +127,12 @@ def append(self, v):
self.data.extend(v)

def extend(self, v):
# FIXME: assert v properties
# FIXME: can we avoid the for loop for perf?
for index in enumerate(v):
self.append(v[index])
lengths = np.cumsum([len(x) for x in v])
self.index.append(0)
self.index.extend(lengths[:-1])

vals = np.concatenate(v)
self.data.extend(vals)

def destroy(self):
'''
Expand All @@ -144,3 +149,7 @@ def destroy(self):

self.index.destroy()
self.index = None

def close(self):
self.data.close()
self.index.close()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import setup, find_packages

version = '0.1.6'
version = '0.1.7'
setup(
name="diskarray",
version=version,
Expand Down

0 comments on commit d1822fb

Please sign in to comment.