-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from RamanjaneyuluIdavalapati/master
diskarry stabilized code
- Loading branch information
Showing
9 changed files
with
341 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
language: python | ||
python: | ||
- '2.7' | ||
install: | ||
- pip install . | ||
script: | ||
- python setup.py test | ||
deploy: | ||
- provider: releases | ||
api_key: | ||
secure: KuV+GjVaFNKvhpI3rgBjolmdhtRWbnJSOgy2iDT4+GQMEz+ypXF3XGR8Opx8NDDeFoBzRxcLcfqRo0Og/i06n5IZ/GcqppErJxYGb984qAJVymukm7pUO4+tls10pDrzZH0+4tTp3SNHukUlcUFjk/+bCTrD67uCZsQGCob3aflLBNx+uL+q3TinF/gbLKdf6wLQqVzkye//ZC20zjZWLRQpyQPRAH1CgGKtRETo5BgSq9w4LbGZd0pGc3S3b33wf3MVzfVlepuXHtwEpviXlXYImRX8/giw6SIx/EJN5IZFkeyGFBetdPsN6dCcOiWAlaFAlrsUSb/YtlrNWZOizkUpmzlAmPTgpl/rW1kS2UUxjLMV1w3oaBt8bhRhX97C0SI0gO2cMWO4E2NIqUFG+rz7Y9VBb/ZpWTlaT5odU+paIBYT0ii6m79YYVu53ajyB6e26zN1Mw12fmRlzBTWsZopxVa22P1+zuIEqtN9meMu5KKONuQ7FL3iNphA8RGguj9X6NKVy4PbbO/25fGScy1oTxsAVCDsiq9x3M+tFg8+9g1fJJ1Ry30wq2cqe1L9o3AaEcuoIvBhf2cIj2ZO1NQAFr9/pkr7t4w/HfJsrRGmlK4hLFkNwZUPdufIS/1s/66lHIiaXacM069xz47zpuxNftjjF3DoZX5Ge/wjKn8= | ||
name: diskarray-0.1.1 | ||
tag_name: 0.1.1 | ||
on: | ||
repo: deep-compute/diskarray | ||
- provider: pypi | ||
distributions: "sdist bdist_wheel" | ||
server: https://upload.pypi.org/legacy/ | ||
user: deepcompute | ||
password: | ||
secure: "XScKeAhmF7gGThEjJCtur1RHlGUZ2n3FXukp7619YHmLoUzvszo8Wg+ZSJrdl3soEuSvVQdn2G3ngxAZdKmNhWVYrUKRdKL5iY4WyYHVujkOq+diVqCGHWbSmZJupyscgt1L/H8l+IohC1dnng/ThQuFp7Jbay68lM8LzS20f16JgSL6Xq+jRqqtBU3jALoqf9scnwuXG+Yj51YTQ9DmS37ctlLyzg3GEbICQB1dNaSZ3HtM1LB0/69++rhukzicm1Z9FcNEbdL9U7ohAgrI1+0mj/4xtURIrOwvlKjhjXUxf5S2RMe49xFq4KDRkXL8uwUEvRnskwXH6u0+mkAQRpMYFMygxcmiVhuhhelJD43RuO56o84IWQLAwje/RUeH1huePUNBV63tAXQC1uQ4yoaZs/DehtjDjgIkk/j4xdTnlwaN68icGrnWxZ5QCwZkz16OQ84HolTLGG9X1Fuqs7iYGI9GmBYfSG6FVp6H6E1Cakvc492gAVqIJKln6Y5u8a1SPpHp2jhhqzsMQo/fwqDTc8m59ENX1zxcIgtJjaWrgkJ4U+W4GBosKpevTJCLKzpirsfJ2Al/7E7rTEOpXpw3NNeeyben708/RiPbWVvhE6yZ7Z6XHYzIO+fF9gr0pRiqdL3uL3ASCUlNEBvw5j7adH57foeTf8mZkQqLyrU=" | ||
on: | ||
tags: true | ||
branch: master |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
# DiskArray | ||
|
||
A resizable and readable numpy array on disk. | ||
|
||
This module is built on numpy `memmap` used for accessing and modifying small segments of large files on disk, without reading the entire file into memory. | ||
|
||
This module also supports appending your numpy arrays to disk array at any time. | ||
|
||
## Installation | ||
|
||
> Prerequisites: Python | ||
```bash | ||
$ sudo pip install diskarray | ||
``` | ||
|
||
## Quick Example | ||
|
||
```python | ||
>>> import numpy as np | ||
>>> from diskarray import DiskArray | ||
|
||
>>> data = np.array([[2 , 3, 4], [1, 2, 3]]) | ||
|
||
>>> da = DiskArray('/tmp/disk.array', shape=(0, 3), dtype=np.float32) | ||
|
||
>>> da.extend(data) | ||
|
||
>>> print(da[:]) | ||
``` | ||
|
||
## Usage | ||
|
||
`DiskArray` supports two methods, extend and append. | ||
|
||
`extend` is used to append arrays to disk array. | ||
|
||
`append` is used to append single array at a time. | ||
|
||
### Importing | ||
|
||
#### Using extend | ||
|
||
Example1: | ||
|
||
```python | ||
>>> import numpy as np | ||
>>> from diskarray import DiskArray | ||
|
||
>>> data = np.array([[2 , 3, 4], [1, 2, 3]]) | ||
|
||
# creating object to disk array | ||
>>> da = DiskArray('/tmp/disk.array', shape=(0, 3), capacity=(10, 3), growby=200, dtype=np.float32) | ||
|
||
# extend the data to disk array | ||
>>> da.extend(data) | ||
|
||
# Get the full array | ||
>>> print(da[:]) | ||
|
||
# Get the data which is in first row | ||
>>> print(da[1]) | ||
|
||
# Get the data from first row to third row | ||
>>> print(da[1:3]) | ||
|
||
# Get the data which is in 1st row 1st column | ||
>>> print(da[1][1]) | ||
``` | ||
|
||
- `/tmp/disk.array` is the file which holds disk arrays. | ||
- `shape` is the size of the disk array. | ||
- `capacity` is the total capacity of the disk array. | ||
This is used because when we want to extend arrays which are larger than `shape` then DiskArray creates again memmap to the file which is costliear operation. | ||
So we are using `capacity` to directly create disk array with the size of `capacity` | ||
|
||
- `capacity` and `growby` are optional which takes `shape` as `capacity` and `growby` as `10000` when these are not given. | ||
|
||
Example2: | ||
|
||
```python | ||
>>> import numpy as np | ||
>>> from diskarray import DiskArray | ||
|
||
>>> dtype = [('token', np.uint32), ('count', np.uint32), ('pmi', np.float32)] | ||
|
||
>>> data = np.array([[(1, 0, 0.), (0, 2, 0.), (0, 2, 0.)], [(1, 0, 0.), (0, 2, 0.), (0, 2, 0.)]], dtype=dtype) | ||
|
||
>>> da = DiskArray('/tmp/disk.array', shape=(0, 3), capacity=(10, 3), dtype=np.float32) | ||
|
||
>>> da.extend(data) | ||
|
||
# Get the full array | ||
>>> print(da[:]) | ||
|
||
# Get the count values at 1th row | ||
>>> print(da[1]['count']) | ||
|
||
# Get the token value at 1th row 2nd column | ||
>>> print(da[1][2]['token']) | ||
|
||
# Modify the pmi value at 1th row 2nd column | ||
>>> da[1][2]['pmi'] = 10.0 | ||
``` | ||
|
||
#### Using append | ||
|
||
Example: | ||
|
||
```python | ||
>>> import numpy as np | ||
>>> from diskarray import DiskArray | ||
|
||
>>> data = np.array([[2 , 3, 4]) | ||
|
||
# creating object to disk array | ||
>>> da = DiskArray('/tmp/disk.array', shape=(0, 3), capacity=(10, 3), growby=200, dtype=np.float32) | ||
|
||
# append 1 dimensional array to disk array | ||
>>> da.append(data) | ||
>>> da.append(data + 1) | ||
|
||
# Get the full array | ||
>>> print(da[:]) | ||
|
||
# Get the data which is in first row | ||
>>> print(da[1]) | ||
|
||
# Get the data from first row to third row | ||
>>> print(da[1:3]) | ||
|
||
# Get the data which is in 1st row 1st column | ||
>>> print(da[1][1]) | ||
``` | ||
|
||
`growby` is used to increase the size of disk array when it reaches to it's maximum limit. | ||
|
||
### Interactive console | ||
|
||
```bash | ||
# diskarray provides command to directly interact with it | ||
|
||
$ diskarray interact <fpath> <shape> <dtype> --capacity <capacity> --growby <growby> --mode <mode> | ||
|
||
# <fpath> is the input file which is used to store disk arrys. | ||
# <shape> is the size of the disk array. | ||
# <dtype> is the data type of the disk array. | ||
# <capacity> is the total capacity of the disk array. | ||
# <growby> is used to increase the size of the disk array when it reaches to it's maximum limit. | ||
# <mode> is to open the disk array in that mode. | ||
``` | ||
|
||
Example: | ||
|
||
```bash | ||
$ diskarray interact /tmp/test '(0, 3)' np.float32 --capacity '(10, 3)' --growby 5 --mode r+ | ||
DiskArray Console | ||
>>> import numpy as np | ||
>>> da.append(np.array([1, 2, 3])) | ||
``` | ||
|
||
## Running Tests | ||
|
||
``` | ||
$ python setup.py test | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from .command import main | ||
from .diskarray import DiskArray |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import code | ||
|
||
from basescript import BaseScript | ||
import numpy as np | ||
|
||
from .diskarray import DiskArray | ||
|
||
class DiskArrayCommand(BaseScript): | ||
DESC = 'DiskArray command-line tool' | ||
|
||
DEFAULT_CAPACITY = None | ||
DEFAULT_GROWBY = 10000 | ||
DEFAULT_MODE = 'r+' | ||
|
||
def interact(self): | ||
if self.args.capacity: | ||
capacity = eval(self.args.capacity) | ||
else: | ||
capacity = self.args.capacity | ||
|
||
fpath = self.args.fpath | ||
shape = eval(self.args.shape) | ||
growby = self.args.growby | ||
dtype = eval(self.args.dtype) | ||
mode = self.args.mode | ||
|
||
interact = DiskArray(fpath=fpath, | ||
shape=shape, | ||
capacity=capacity, | ||
growby=growby, | ||
dtype=dtype, | ||
mode=mode) | ||
|
||
namespace=dict(da=interact) | ||
code.interact("DiskArray Console", local=namespace) | ||
|
||
def define_subcommands(self, subcommands): | ||
super(DiskArrayCommand, self).define_subcommands(subcommands) | ||
|
||
interact_cmd = subcommands.add_parser('interact', | ||
help='DiskArray Console') | ||
interact_cmd.set_defaults(func=self.interact) | ||
interact_cmd.add_argument('fpath', | ||
help='Input file which is used to store disk arrys.\ | ||
eg: /tmp/disk.array') | ||
interact_cmd.add_argument('shape', | ||
help='shape is the size of the disk array.\ | ||
eg: \'(0, 3)\'') | ||
interact_cmd.add_argument('dtype', | ||
help='data type of the disk array.\ | ||
eg: np.float32') | ||
interact_cmd.add_argument('-c', '--capacity', | ||
default=self.DEFAULT_CAPACITY, type=str, | ||
help='capacity is the total capacity of the disk array.\ | ||
This is optional and default is shape value\ | ||
eg: --capacity \'(10, 3)\'') | ||
interact_cmd.add_argument('-g', '--growby', | ||
default=self.DEFAULT_GROWBY, type=int, | ||
help='growby is used to increase the size of\ | ||
the disk array when it reaches to it\'s maximum limit.\ | ||
This is optional and default is 10000\ | ||
eg: --growby 200') | ||
interact_cmd.add_argument('-m', '--mode', | ||
default=self.DEFAULT_MODE, type=str, | ||
help='mode is to open the disk array in that mode.\ | ||
Example modes are r+, r, w+ and c\ | ||
This is optional and default is r+') | ||
|
||
def main(): | ||
DiskArrayCommand().start() | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.