Skip to content

Commit

Permalink
touch diskarray file with given shape when object is created
Browse files Browse the repository at this point in the history
  • Loading branch information
RamanjaneyuluIdavalapati committed May 12, 2018
1 parent e337cba commit 2fad1bc
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 55 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@ Example2:
>>> import numpy as np
>>> from diskarray import DiskArray

>>> dtype = [('token', np.uint32), ('count', np.uint32), ('pmi', np.float32)]
>>> dtype = [('token', np.uint32), ('count', np.uint32), ('vec', np.float32)]

>>> data = np.array([[(1, 0, 0.), (0, 2, 0.), (0, 2, 0.)], [(1, 0, 0.), (0, 2, 0.), (0, 2, 0.)]], dtype=dtype)

>>> da = DiskArray('/tmp/disk.array', shape=(0, 3), capacity=(10, 3), dtype=np.float32)
>>> da = DiskArray('/tmp/disk.array', shape=(0, 3), capacity=(10, 3), dtype=dtype)

>>> da.extend(data)

Expand All @@ -99,8 +99,8 @@ Example2:
# Get the token value at 1th row 2nd column
>>> print(da[1][2]['token'])

# Modify the pmi value at 1th row 2nd column
>>> da[1][2]['pmi'] = 10.0
# Modify the vec value at 1th row 2nd column
>>> da[1][2]['vec'] = 10.0
```

#### Using append
Expand Down
50 changes: 25 additions & 25 deletions diskarray/diskarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ def __init__(self, fpath, dtype, mode='r+', shape=None,
>>> import numpy as np
>>> da = DiskArray('/tmp/test.array', shape=(0, 3), dtype=np.float32)
>>> print(da[:])
[]
[[0. 0. 0.]]
'''

itemsize = np.dtype(dtype).itemsize

if not os.path.exists(fpath):
if not shape:
shape = (0,)
# FIXME: what if capacity is defined?
# FIXME: what if capacity is defined?
if not capacity:
capacity = tuple([max(x, 1) for x in shape])

Expand Down Expand Up @@ -110,9 +110,9 @@ def append(self, v):
>>> data = np.array([[2,3,4], [1, 2, 3]])
>>> da.append(data[0])
>>> print(da[:])
[[ 2. 3. 4.]
[ 0. 0. 0.]
[ 0. 0. 0.]]
[[2. 3. 4.]
[0. 0. 0.]
[0. 0. 0.]]
'''

# FIXME: for now we only support
Expand Down Expand Up @@ -142,29 +142,29 @@ def extend(self, v):
>>> import numpy as np
>>> da = DiskArray('/tmp/test.array', shape=(0, 3), capacity=(10, 3), dtype=np.float32)
>>> print(da[:])
[[ 2. 3. 4.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]]
[[2. 3. 4.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]]
>>> data = np.array([[2,3,4], [1, 2, 3]])
>>> da.extend(data)
>>> print(da[:])
[[ 2. 3. 4.]
[ 1. 2. 3.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]
[ 0. 0. 0.]]
[[2. 3. 4.]
[1. 2. 3.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]]
>>> os.remove('/tmp/test.array')
'''

Expand Down
54 changes: 31 additions & 23 deletions diskarray/vararray.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import shutil
from logging import Logger

import numpy as np
Expand All @@ -14,7 +15,7 @@ def __init__(self, dpath, dtype, dtype_index=np.uint64,
'''
>>> import numpy as np
>>> from diskarray import DiskVarArray
>>> d = DiskVarArray('/tmp/test', dtype='uint32')
>>> d = DiskVarArray('/tmp/test1', dtype='uint32')
>>> d # doctest:+ELLIPSIS
<diskarray.vararray.DiskVarArray object at 0x...>
'''
Expand All @@ -30,53 +31,56 @@ def __init__(self, dpath, dtype, dtype_index=np.uint64,
os.makedirs(dpath)

self._data_fpath = os.path.join(dpath, 'data')
self._data = DiskArray(self._data_fpath, dtype=dtype,
self.data = DiskArray(self._data_fpath, dtype=dtype,
mode=mode, growby=growby, log=log)

self._index_fpath = os.path.join(dpath, 'index')
self._index = DiskArray(self._index_fpath, dtype=dtype_index,
self.index = DiskArray(self._index_fpath, dtype=dtype_index,
mode=mode, growby=growby, log=log)

def flush(self):
self._data.flush()
self._index.flush()
self.data.flush()
self.index.flush()

@property
def dtype(self):
'''
>>> import numpy as np
>>> d = DiskVarArray('/tmp/test', dtype='uint32')
>>> d = DiskVarArray('/tmp/test1', dtype='uint32')
>>> d.dtype
'uint32'
>>> shutil.rmtree('/tmp/test1', ignore_errors=True)
'''
return self._dtype

@property
def dtype_index(self):
'''
>>> import numpy as np
>>> d = DiskVarArray('/tmp/test', dtype='uint32')
>>> d = DiskVarArray('/tmp/test1', dtype='uint32')
>>> d.dtype_index
<class 'numpy.uint64'>
>>> shutil.rmtree('/tmp/test1', ignore_errors=True)
'''
return self._dtype_index

def __getitem__(self, idx):
'''
>>> import numpy as np
>>> d = DiskVarArray('/tmp/test', dtype='uint32')
>>> d = DiskVarArray('/tmp/test1', dtype='uint32')
>>> d.append([1, 2, 3, 4])
>>> d.__getitem__(0)
memmap([1, 2, 3, 4], dtype=uint32)
>>> shutil.rmtree('/tmp/test1', ignore_errors=True)
'''
sindex = self._index[idx]
sindex = self.index[idx]

if idx == (len(self._index) - 1):
eindex = len(self._data)
if idx == (len(self.index) - 1):
eindex = len(self.data)
else:
eindex = self._index[idx+1]
eindex = self.index[idx+1]

return self._data[sindex:eindex]
return self.data[sindex:eindex]

@property
def num_elements(self):
Expand All @@ -86,8 +90,9 @@ def num_elements(self):
>>> d.append([1, 2, 3, 4])
>>> d.num_elements
4
>>> shutil.rmtree('/tmp/test1', ignore_errors=True)
'''
return len(self._data)
return len(self.data)

@property
def num_lists(self):
Expand All @@ -100,8 +105,9 @@ def num_lists(self):
>>> d.append([5, 6, 7, 8])
>>> d.num_lists
2
>>> shutil.rmtree('/tmp/test2', ignore_errors=True)
'''
return len(self._index)
return len(self.index)

def append(self, v):
'''
Expand All @@ -112,9 +118,10 @@ def append(self, v):
>>> d.append([5, 6, 7, 8])
>>> d.__getitem__(1)
memmap([5, 6, 7, 8], dtype=uint32)
>>> shutil.rmtree('/tmp/test3', ignore_errors=True)
'''
self._index.append(len(self._data))
self._data.extend(v)
self.index.append(len(self.data))
self.data.extend(v)

def extend(self, v):
# FIXME: assert v properties
Expand All @@ -125,14 +132,15 @@ def extend(self, v):
def destroy(self):
'''
>>> import numpy as np
>>> d = DiskVarArray('/tmp/test5', dtype='uint32')
>>> d = DiskVarArray('/tmp/test4', dtype='uint32')
>>> d.append([1, 2, 3, 4])
>>> d.destroy # doctest:+ELLIPSIS
>>> d.destroy # doctest:+ELLIPSIS
<bound method DiskVarArray.destroy of <diskarray.vararray.DiskVarArray object at 0x...>>
>>> shutil.rmtree('/tmp/test4', ignore_errors=True)
'''

self._data.destroy()
self._data = None
self.data.destroy()
self.data = None

self._index.destroy()
self._index = None
self.index.destroy()
self.index = None
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import setup, find_packages

version = '0.1.5'
version = '0.1.6'
setup(
name="diskarray",
version=version,
Expand All @@ -12,8 +12,8 @@
download_url="https://github.com/deep-compute/diskarray/tarball/%s" % version,
license='MIT License',
install_requires=[
'numpy==1.13.1',
'basescript==0.2.1'
'numpy==1.14.3',
'basescript==0.2.5'
],
package_dir={'diskarray': 'diskarray'},
packages=find_packages('.'),
Expand Down

0 comments on commit 2fad1bc

Please sign in to comment.