Skip to content

Commit

Permalink
Add Python 3 support.
Browse files Browse the repository at this point in the history
  • Loading branch information
panta committed Aug 15, 2017
1 parent 95953a4 commit d6fb61f
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 69 deletions.
96 changes: 78 additions & 18 deletions src/pyrabin.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,16 @@

#define READ_BUF_SIZE 1048576

static PyObject* RabinError;
struct module_state {
PyObject *RabinError;
};

#if PY_MAJOR_VERSION >= 3
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
#else
#define GETSTATE(m) (&_state)
static struct module_state _state;
#endif

extern PyTypeObject RabinType;

Expand Down Expand Up @@ -84,7 +93,8 @@ static PyObject* set_average_block_size(PyObject* self, PyObject* args)
}
if (prev < rabin_polynomial_min_block_size ||
prev > rabin_polynomial_max_block_size) {
PyErr_SetString(RabinError,
struct module_state *st = GETSTATE(self);
PyErr_SetString(st->RabinError,
"average block size should between min and max block size");
return NULL;
}
Expand Down Expand Up @@ -119,7 +129,8 @@ static PyObject* get_file_fingerprints(PyObject* self, PyObject* args)

FILE* fp = fopen(filename, "rb");
if (!fp) {
return PyErr_SetFromErrnoWithFilename(RabinError, filename);
struct module_state *st = GETSTATE(self);
return PyErr_SetFromErrnoWithFilename(st->RabinError, filename);
}

struct rabin_polynomial* head = get_file_rabin_polys(fp);
Expand All @@ -136,19 +147,20 @@ static PyObject* get_file_fingerprints(PyObject* self, PyObject* args)

static PyObject* split_file_by_fingerprints(PyObject* self, PyObject* args)
{
struct module_state *st = GETSTATE(self);
const char *filename;
if (!PyArg_ParseTuple(args, "s", &filename)) {
return NULL;
}

FILE* fp = fopen(filename, "rb");
if (!fp) {
return PyErr_SetFromErrnoWithFilename(RabinError, filename);
return PyErr_SetFromErrnoWithFilename(st->RabinError, filename);
}

struct rabin_polynomial* head = get_file_rabin_polys(fp);
if (head == NULL) {
PyErr_SetString(RabinError, "get_file_rabin_polys()");
PyErr_SetString(st->RabinError, "get_file_rabin_polys()");
return NULL;
}

Expand All @@ -171,16 +183,16 @@ static PyObject* split_file_by_fingerprints(PyObject* self, PyObject* args)
while (curr) {
if ((offset = fseek(fp, curr->start, SEEK_SET) == -1)) {
fclose(fp);
return PyErr_SetFromErrnoWithFilename(RabinError, filename);
return PyErr_SetFromErrnoWithFilename(st->RabinError, filename);
}

/* Save chunk to temporarily file */
snprintf(outfile, BUFSIZ, ".%x.tmp", curr->polynomial);
snprintf(outfile, BUFSIZ, ".%llx.tmp", (unsigned long long) curr->polynomial);
SHA1_Init(&ctx);
FILE* ofp = fopen(outfile, "wb");
if (!ofp) {
fclose(fp);
return PyErr_SetFromErrnoWithFilename(RabinError, filename);
return PyErr_SetFromErrnoWithFilename(st->RabinError, filename);
}

size_t remain_read = curr->length;
Expand All @@ -198,7 +210,7 @@ static PyObject* split_file_by_fingerprints(PyObject* self, PyObject* args)
if (bytes_written == 0 && ferror(ofp)) {
fclose(fp);
fclose(ofp);
return PyErr_SetFromErrnoWithFilename(RabinError, outfile);
return PyErr_SetFromErrnoWithFilename(st->RabinError, outfile);
}
total_written += bytes_written;
} while (total_written != bytes_read);
Expand All @@ -207,7 +219,7 @@ static PyObject* split_file_by_fingerprints(PyObject* self, PyObject* args)
fclose(ofp);

/* Rename chunk using SHA-1 sum as filename */
SHA1_Final(digest, &ctx);
SHA1_Final((unsigned char *) digest, &ctx);
to_hex_digest(digest, hex_digest);
strncat(hex_digest, ".blk", SHA_DIGEST_LENGTH * 2 + 5);
rename(outfile, hex_digest);
Expand Down Expand Up @@ -247,25 +259,73 @@ static PyMethodDef PyRabinMethods[] = {
{NULL, NULL, 0, NULL}
};

#if PY_MAJOR_VERSION >= 3

PyMODINIT_FUNC initrabin(void) {
PyObject* m = Py_InitModule("rabin", PyRabinMethods);
if (m == NULL) {
return;
}
static int rabin_traverse(PyObject *m, visitproc visit, void *arg) {
Py_VISIT(GETSTATE(m)->RabinError);
return 0;
}

static int rabin_clear(PyObject *m) {
Py_CLEAR(GETSTATE(m)->RabinError);
return 0;
}


static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"rabin",
NULL,
sizeof(struct module_state),
PyRabinMethods,
NULL,
rabin_traverse,
rabin_clear,
NULL
};

#define INITERROR return NULL

PyMODINIT_FUNC
PyInit_rabin(void)

#else
#define INITERROR return

void
initrabin(void)
#endif
{
#if PY_MAJOR_VERSION >= 3
PyObject *module = PyModule_Create(&moduledef);
#else
PyObject *module = Py_InitModule("rabin", PyRabinMethods);
#endif
if (module == NULL)
INITERROR;

struct module_state *st = GETSTATE(module);

// Initialize defaults
initialize_rabin_polynomial_defaults();

// Initialize rabin.Rabin
if (PyType_Ready(&RabinType) < 0) {
fprintf(stderr, "Invalid PyTypeObject `RabinType'\n");
return;
INITERROR;
}

Py_INCREF(&RabinType);
PyModule_AddObject(m, "Rabin", (PyObject*)&RabinType);
PyModule_AddObject(module, "Rabin", (PyObject*)&RabinType);

// Initialize RabinError
RabinError = PyErr_NewException("rabin.error", NULL, NULL);
st->RabinError = PyErr_NewException("rabin.error", NULL, NULL);
if (st->RabinError == NULL) {
Py_DECREF(module);
INITERROR;
}

#if PY_MAJOR_VERSION >= 3
return module;
#endif
}
4 changes: 4 additions & 0 deletions src/rabin.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,12 @@ static PyMethodDef Rabin_methods[] = {
};

PyTypeObject RabinType = {
#if PY_MAJOR_VERSION >= 3
PyVarObject_HEAD_INIT(NULL, 0)
#else
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
#endif
"rabin.Rabin", /*tp_name*/
sizeof(Rabin), /*tp_basicsize*/
0, /*tp_itemsize*/
Expand Down
12 changes: 6 additions & 6 deletions test/test_Rabin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

TARGET = 'test.bin'
os.system("dd if=/dev/urandom of=%s bs=1024 count=100" % TARGET)
random.seed(open(TARGET, 'r').read(1024))
random.seed(open(TARGET, 'rb').read(1024))

set_min_block_size(1024)
set_max_block_size(2048)
Expand All @@ -21,7 +21,7 @@ def block_reached(start, length, fingerprint):
r = Rabin()
r.register(block_reached)

with open(TARGET, 'r') as f:
with open(TARGET, 'rb') as f:
while True:
size = random.randint(1,os.path.getsize(TARGET))
data = f.read(size)
Expand All @@ -43,12 +43,12 @@ def block_reached(start, length, fingerprint):
assert gl == pl == rl
assert gp == pp == rp
except:
print 'gold ', gold[i]
print 'partial', partial[i]
print 'reached', reached[i]
print('gold ', gold[i])
print('partial', partial[i])
print('reached', reached[i])
raise

assert partial == gold == reached

os.unlink(TARGET)
print 'passed'
print('passed')
12 changes: 6 additions & 6 deletions test/test_delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

TARGET = 'test.bin'
os.system("dd if=/dev/urandom of=%s bs=1024k count=10" % TARGET)
random.seed(open(TARGET, 'r').read(1024))
random.seed(open(TARGET, 'rb').read(1024))

set_min_block_size(1024)
set_max_block_size(2048)
Expand All @@ -16,9 +16,9 @@
r = Rabin()

before = get_file_fingerprints(TARGET)
fh = open(TARGET, 'r+')
fh = open(TARGET, 'rb+')
fh.seek(1024*1024*5)
fh.write('x')
fh.write(b'x')
fh.close()
after = get_file_fingerprints(TARGET)

Expand All @@ -33,11 +33,11 @@
assert bl == al
assert bp == ap
except:
print 'after', after[i]
print 'before', before[i]
print('after', after[i])
print('before', before[i])
diffcount += 1

assert diffcount == 1, diffcount

os.unlink(TARGET)
print 'passed'
print('passed')
14 changes: 7 additions & 7 deletions test/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

os.system("dd if=/dev/urandom of=%s bs=%d count=%d" % (
TARGET, Mb, filesizeM))
random.seed(open(TARGET, 'r').read(1024))
random.seed(open(TARGET, 'rb').read(1024))

set_min_block_size(1024)
set_max_block_size(2048)
Expand All @@ -21,10 +21,10 @@
r = Rabin()

before = get_file_fingerprints(TARGET)
f = open(TARGET, 'r+')
data = f.read(filesizeM/2 * Mb)
f = open(TARGET, 'rb+')
data = f.read(int(filesizeM/2 * Mb))
r.update(data)
r.update('x')
r.update(b'x')
data = f.read()
r.update(data)
after = r.fingerprints()
Expand All @@ -40,11 +40,11 @@
assert bl == al
assert bp == ap
except:
print 'before', before[i]
print 'after', after[i]
print('before', before[i])
print('after', after[i])
diffcount += 1

assert diffcount < 10, diffcount

os.unlink(TARGET)
print 'passed'
print('passed')
34 changes: 20 additions & 14 deletions test/test_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,49 +13,55 @@
else:
stream_count = 10
seed = random.random()
print 'seed', seed
print('seed', seed)

stream_bs = 1024 * 1024
stream_len = stream_bs * stream_count
max_mem = 1024 * 1024 * 20

resource.setrlimit(resource.RLIMIT_AS, (max_mem,-1))
# print resource.getrlimit(resource.RLIMIT_AS)
# print(resource.getrlimit(resource.RLIMIT_AS))

random.seed(seed)

max_blocksize = random.randint(512, max_mem/10)

set_min_block_size(max_blocksize/10)
set_min_block_size(int(max_blocksize/10))
set_max_block_size(max_blocksize)
set_average_block_size(max_blocksize/5)
set_average_block_size(int(max_blocksize/5))

reached = []
def block_reached(start, length, fingerprint):
# print '(%s, %s, %s)' % (start, length, fingerprint)
# print('(%s, %s, %s)' % (start, length, fingerprint))
reached.append((start, length, fingerprint))


r = Rabin()
r.register(block_reached)

from guppy import hpy; hp=hpy()
try:
from guppy import hpy; hp=hpy()
except:
pass

total = 0
while total < stream_len:
size = random.randint(1,max_blocksize*2)
size = min(size, stream_len - total)
# print size,
# print(size),
data = ''
for i in xrange(size):
for i in range(size):
data += chr(random.randrange(0,256))
r.update(data)
total += len(data)
print total
print hp.heap()
print(total)
try:
print(hp.heap())
except:
pass

partial = r.fingerprints()
print 'partial done'
print('partial done')

assert len(partial) == len(reached)

Expand All @@ -67,10 +73,10 @@ def block_reached(start, length, fingerprint):
assert pl == rl
assert pp == rp
except:
print 'partial', partial[i]
print 'reached', reached[i]
print('partial', partial[i])
print('reached', reached[i])
raise

assert partial == reached

print 'passed'
print('passed')
Loading

0 comments on commit d6fb61f

Please sign in to comment.