From d6fb61f3672bee3652f117218b902edd460e0aeb Mon Sep 17 00:00:00 2001 From: Marco Pantaleoni Date: Tue, 15 Aug 2017 13:39:24 +0200 Subject: [PATCH 1/2] Add Python 3 support. --- src/pyrabin.c | 96 +++++++++++++++++++++++++++++++-------- src/rabin.c | 4 ++ test/test_Rabin.py | 12 ++--- test/test_delta.py | 12 ++--- test/test_insert.py | 14 +++--- test/test_streaming.py | 34 ++++++++------ test/test_swap.py | 14 +++--- test/test_swap_twofile.py | 22 ++++----- 8 files changed, 139 insertions(+), 69 deletions(-) diff --git a/src/pyrabin.c b/src/pyrabin.c index ab95b09..7a7d279 100644 --- a/src/pyrabin.c +++ b/src/pyrabin.c @@ -30,7 +30,16 @@ #define READ_BUF_SIZE 1048576 -static PyObject* RabinError; +struct module_state { + PyObject *RabinError; +}; + +#if PY_MAJOR_VERSION >= 3 +#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) +#else +#define GETSTATE(m) (&_state) +static struct module_state _state; +#endif extern PyTypeObject RabinType; @@ -84,7 +93,8 @@ static PyObject* set_average_block_size(PyObject* self, PyObject* args) } if (prev < rabin_polynomial_min_block_size || prev > rabin_polynomial_max_block_size) { - PyErr_SetString(RabinError, + struct module_state *st = GETSTATE(self); + PyErr_SetString(st->RabinError, "average block size should between min and max block size"); return NULL; } @@ -119,7 +129,8 @@ static PyObject* get_file_fingerprints(PyObject* self, PyObject* args) FILE* fp = fopen(filename, "rb"); if (!fp) { - return PyErr_SetFromErrnoWithFilename(RabinError, filename); + struct module_state *st = GETSTATE(self); + return PyErr_SetFromErrnoWithFilename(st->RabinError, filename); } struct rabin_polynomial* head = get_file_rabin_polys(fp); @@ -136,6 +147,7 @@ static PyObject* get_file_fingerprints(PyObject* self, PyObject* args) static PyObject* split_file_by_fingerprints(PyObject* self, PyObject* args) { + struct module_state *st = GETSTATE(self); const char *filename; if (!PyArg_ParseTuple(args, "s", &filename)) { return NULL; @@ -143,12 +155,12 @@ static PyObject* split_file_by_fingerprints(PyObject* self, PyObject* args) FILE* fp = fopen(filename, "rb"); if (!fp) { - return PyErr_SetFromErrnoWithFilename(RabinError, filename); + return PyErr_SetFromErrnoWithFilename(st->RabinError, filename); } struct rabin_polynomial* head = get_file_rabin_polys(fp); if (head == NULL) { - PyErr_SetString(RabinError, "get_file_rabin_polys()"); + PyErr_SetString(st->RabinError, "get_file_rabin_polys()"); return NULL; } @@ -171,16 +183,16 @@ static PyObject* split_file_by_fingerprints(PyObject* self, PyObject* args) while (curr) { if ((offset = fseek(fp, curr->start, SEEK_SET) == -1)) { fclose(fp); - return PyErr_SetFromErrnoWithFilename(RabinError, filename); + return PyErr_SetFromErrnoWithFilename(st->RabinError, filename); } /* Save chunk to temporarily file */ - snprintf(outfile, BUFSIZ, ".%x.tmp", curr->polynomial); + snprintf(outfile, BUFSIZ, ".%llx.tmp", (unsigned long long) curr->polynomial); SHA1_Init(&ctx); FILE* ofp = fopen(outfile, "wb"); if (!ofp) { fclose(fp); - return PyErr_SetFromErrnoWithFilename(RabinError, filename); + return PyErr_SetFromErrnoWithFilename(st->RabinError, filename); } size_t remain_read = curr->length; @@ -198,7 +210,7 @@ static PyObject* split_file_by_fingerprints(PyObject* self, PyObject* args) if (bytes_written == 0 && ferror(ofp)) { fclose(fp); fclose(ofp); - return PyErr_SetFromErrnoWithFilename(RabinError, outfile); + return PyErr_SetFromErrnoWithFilename(st->RabinError, outfile); } total_written += bytes_written; } while (total_written != bytes_read); @@ -207,7 +219,7 @@ static PyObject* split_file_by_fingerprints(PyObject* self, PyObject* args) fclose(ofp); /* Rename chunk using SHA-1 sum as filename */ - SHA1_Final(digest, &ctx); + SHA1_Final((unsigned char *) digest, &ctx); to_hex_digest(digest, hex_digest); strncat(hex_digest, ".blk", SHA_DIGEST_LENGTH * 2 + 5); rename(outfile, hex_digest); @@ -247,12 +259,52 @@ static PyMethodDef PyRabinMethods[] = { {NULL, NULL, 0, NULL} }; +#if PY_MAJOR_VERSION >= 3 -PyMODINIT_FUNC initrabin(void) { - PyObject* m = Py_InitModule("rabin", PyRabinMethods); - if (m == NULL) { - return; - } +static int rabin_traverse(PyObject *m, visitproc visit, void *arg) { + Py_VISIT(GETSTATE(m)->RabinError); + return 0; +} + +static int rabin_clear(PyObject *m) { + Py_CLEAR(GETSTATE(m)->RabinError); + return 0; +} + + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "rabin", + NULL, + sizeof(struct module_state), + PyRabinMethods, + NULL, + rabin_traverse, + rabin_clear, + NULL +}; + +#define INITERROR return NULL + +PyMODINIT_FUNC +PyInit_rabin(void) + +#else +#define INITERROR return + +void +initrabin(void) +#endif +{ +#if PY_MAJOR_VERSION >= 3 + PyObject *module = PyModule_Create(&moduledef); +#else + PyObject *module = Py_InitModule("rabin", PyRabinMethods); +#endif + if (module == NULL) + INITERROR; + + struct module_state *st = GETSTATE(module); // Initialize defaults initialize_rabin_polynomial_defaults(); @@ -260,12 +312,20 @@ PyMODINIT_FUNC initrabin(void) { // Initialize rabin.Rabin if (PyType_Ready(&RabinType) < 0) { fprintf(stderr, "Invalid PyTypeObject `RabinType'\n"); - return; + INITERROR; } Py_INCREF(&RabinType); - PyModule_AddObject(m, "Rabin", (PyObject*)&RabinType); + PyModule_AddObject(module, "Rabin", (PyObject*)&RabinType); // Initialize RabinError - RabinError = PyErr_NewException("rabin.error", NULL, NULL); + st->RabinError = PyErr_NewException("rabin.error", NULL, NULL); + if (st->RabinError == NULL) { + Py_DECREF(module); + INITERROR; + } + +#if PY_MAJOR_VERSION >= 3 + return module; +#endif } diff --git a/src/rabin.c b/src/rabin.c index ffc73b4..5efbaae 100644 --- a/src/rabin.c +++ b/src/rabin.c @@ -144,8 +144,12 @@ static PyMethodDef Rabin_methods[] = { }; PyTypeObject RabinType = { +#if PY_MAJOR_VERSION >= 3 + PyVarObject_HEAD_INIT(NULL, 0) +#else PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ +#endif "rabin.Rabin", /*tp_name*/ sizeof(Rabin), /*tp_basicsize*/ 0, /*tp_itemsize*/ diff --git a/test/test_Rabin.py b/test/test_Rabin.py index ea1edd2..a344dc4 100755 --- a/test/test_Rabin.py +++ b/test/test_Rabin.py @@ -7,7 +7,7 @@ TARGET = 'test.bin' os.system("dd if=/dev/urandom of=%s bs=1024 count=100" % TARGET) -random.seed(open(TARGET, 'r').read(1024)) +random.seed(open(TARGET, 'rb').read(1024)) set_min_block_size(1024) set_max_block_size(2048) @@ -21,7 +21,7 @@ def block_reached(start, length, fingerprint): r = Rabin() r.register(block_reached) -with open(TARGET, 'r') as f: +with open(TARGET, 'rb') as f: while True: size = random.randint(1,os.path.getsize(TARGET)) data = f.read(size) @@ -43,12 +43,12 @@ def block_reached(start, length, fingerprint): assert gl == pl == rl assert gp == pp == rp except: - print 'gold ', gold[i] - print 'partial', partial[i] - print 'reached', reached[i] + print('gold ', gold[i]) + print('partial', partial[i]) + print('reached', reached[i]) raise assert partial == gold == reached os.unlink(TARGET) -print 'passed' +print('passed') diff --git a/test/test_delta.py b/test/test_delta.py index f466104..64d1d68 100755 --- a/test/test_delta.py +++ b/test/test_delta.py @@ -7,7 +7,7 @@ TARGET = 'test.bin' os.system("dd if=/dev/urandom of=%s bs=1024k count=10" % TARGET) -random.seed(open(TARGET, 'r').read(1024)) +random.seed(open(TARGET, 'rb').read(1024)) set_min_block_size(1024) set_max_block_size(2048) @@ -16,9 +16,9 @@ r = Rabin() before = get_file_fingerprints(TARGET) -fh = open(TARGET, 'r+') +fh = open(TARGET, 'rb+') fh.seek(1024*1024*5) -fh.write('x') +fh.write(b'x') fh.close() after = get_file_fingerprints(TARGET) @@ -33,11 +33,11 @@ assert bl == al assert bp == ap except: - print 'after', after[i] - print 'before', before[i] + print('after', after[i]) + print('before', before[i]) diffcount += 1 assert diffcount == 1, diffcount os.unlink(TARGET) -print 'passed' +print('passed') diff --git a/test/test_insert.py b/test/test_insert.py index 7899d96..22db496 100755 --- a/test/test_insert.py +++ b/test/test_insert.py @@ -12,7 +12,7 @@ os.system("dd if=/dev/urandom of=%s bs=%d count=%d" % ( TARGET, Mb, filesizeM)) -random.seed(open(TARGET, 'r').read(1024)) +random.seed(open(TARGET, 'rb').read(1024)) set_min_block_size(1024) set_max_block_size(2048) @@ -21,10 +21,10 @@ r = Rabin() before = get_file_fingerprints(TARGET) -f = open(TARGET, 'r+') -data = f.read(filesizeM/2 * Mb) +f = open(TARGET, 'rb+') +data = f.read(int(filesizeM/2 * Mb)) r.update(data) -r.update('x') +r.update(b'x') data = f.read() r.update(data) after = r.fingerprints() @@ -40,11 +40,11 @@ assert bl == al assert bp == ap except: - print 'before', before[i] - print 'after', after[i] + print('before', before[i]) + print('after', after[i]) diffcount += 1 assert diffcount < 10, diffcount os.unlink(TARGET) -print 'passed' +print('passed') diff --git a/test/test_streaming.py b/test/test_streaming.py index 42058f1..2ea3b9d 100755 --- a/test/test_streaming.py +++ b/test/test_streaming.py @@ -13,49 +13,55 @@ else: stream_count = 10 seed = random.random() -print 'seed', seed +print('seed', seed) stream_bs = 1024 * 1024 stream_len = stream_bs * stream_count max_mem = 1024 * 1024 * 20 resource.setrlimit(resource.RLIMIT_AS, (max_mem,-1)) -# print resource.getrlimit(resource.RLIMIT_AS) +# print(resource.getrlimit(resource.RLIMIT_AS)) random.seed(seed) max_blocksize = random.randint(512, max_mem/10) -set_min_block_size(max_blocksize/10) +set_min_block_size(int(max_blocksize/10)) set_max_block_size(max_blocksize) -set_average_block_size(max_blocksize/5) +set_average_block_size(int(max_blocksize/5)) reached = [] def block_reached(start, length, fingerprint): - # print '(%s, %s, %s)' % (start, length, fingerprint) + # print('(%s, %s, %s)' % (start, length, fingerprint)) reached.append((start, length, fingerprint)) r = Rabin() r.register(block_reached) -from guppy import hpy; hp=hpy() +try: + from guppy import hpy; hp=hpy() +except: + pass total = 0 while total < stream_len: size = random.randint(1,max_blocksize*2) size = min(size, stream_len - total) - # print size, + # print(size), data = '' - for i in xrange(size): + for i in range(size): data += chr(random.randrange(0,256)) r.update(data) total += len(data) - print total - print hp.heap() + print(total) + try: + print(hp.heap()) + except: + pass partial = r.fingerprints() -print 'partial done' +print('partial done') assert len(partial) == len(reached) @@ -67,10 +73,10 @@ def block_reached(start, length, fingerprint): assert pl == rl assert pp == rp except: - print 'partial', partial[i] - print 'reached', reached[i] + print('partial', partial[i]) + print('reached', reached[i]) raise assert partial == reached -print 'passed' +print('passed') diff --git a/test/test_swap.py b/test/test_swap.py index 2d58fad..8d3e8cf 100755 --- a/test/test_swap.py +++ b/test/test_swap.py @@ -12,7 +12,7 @@ os.system("dd if=/dev/urandom of=%s bs=%d count=%d" % ( TARGET, Mb, filesizeM)) -random.seed(open(TARGET, 'r').read(1024)) +random.seed(open(TARGET, 'rb').read(1024)) set_min_block_size(1024) set_max_block_size(2048) @@ -21,12 +21,12 @@ r = Rabin() before = get_file_fingerprints(TARGET) -f = open(TARGET, 'r+') -f.seek(filesizeM/2 * Mb) +f = open(TARGET, 'rb+') +f.seek(int(filesizeM/2 * Mb)) data = f.read() r.update(data) f.seek(0) -data = f.read(filesizeM/2 * Mb) +data = f.read(int(filesizeM/2 * Mb)) r.update(data) after = r.fingerprints() @@ -46,10 +46,10 @@ def collect(series, fingerprint): for entry in db.values(): if len(entry) != 2: diffcount += 1 - print entry, len(entry) + print(entry, len(entry)) -print len(db), diffcount +print(len(db), diffcount) assert diffcount < len(before)*.01, diffcount os.unlink(TARGET) -print 'passed' +print('passed') diff --git a/test/test_swap_twofile.py b/test/test_swap_twofile.py index 07001e1..c024da2 100755 --- a/test/test_swap_twofile.py +++ b/test/test_swap_twofile.py @@ -22,17 +22,17 @@ os.system("dd if=/dev/urandom of=%s bs=%d count=%d" % ( beforefn, Mb, filesizeM)) split_file_by_fingerprints(beforefn) -beforefh = open(beforefn, 'r') +beforefh = open(beforefn, 'rb') os.chdir('..') os.chdir('after') -beforefh.seek(filesizeM/2 * Mb) +beforefh.seek(int(filesizeM/2 * Mb)) data = beforefh.read() -afterfh = open(afterfn, 'w') +afterfh = open(afterfn, 'wb') afterfh.write(data) beforefh.seek(0) -data = beforefh.read(filesizeM/2 * Mb) +data = beforefh.read(int(filesizeM/2 * Mb)) afterfh.write(data) afterfh.close() split_file_by_fingerprints(afterfn) @@ -59,13 +59,13 @@ def collect(series, fingerprint): for entry in db.values(): if len(entry) != 2: diffcount += 1 - print entry, len(entry) + print(entry, len(entry)) -print len(db), diffcount +print(len(db), diffcount) assert diffcount < len(before)*.01, diffcount os.system('rm -rf before after') -print 'passed' +print('passed') ''' before.sort() @@ -82,14 +82,14 @@ def collect(series, fingerprint): assert bl == al assert bp == ap except: - print 'before', before[i] - print 'after', after[i] + print('before', before[i]) + print('after', after[i]) diffcount += 1 blocks = len(before) -print blocks, diffcount +print(blocks, diffcount) assert diffcount < blocks*.8, diffcount os.system('rm -rf before after') -print 'passed' +print('passed') ''' From 27d20332cd5e52105573cb35e2951a55a45e1132 Mon Sep 17 00:00:00 2001 From: Marco Pantaleoni Date: Thu, 17 Aug 2017 16:08:37 +0200 Subject: [PATCH 2/2] Enable support for print() for Python 2.x --- test/test_Rabin.py | 4 +++- test/test_delta.py | 2 ++ test/test_insert.py | 2 ++ test/test_streaming.py | 2 ++ test/test_swap.py | 2 ++ test/test_swap_twofile.py | 2 ++ 6 files changed, 13 insertions(+), 1 deletion(-) diff --git a/test/test_Rabin.py b/test/test_Rabin.py index a344dc4..b41b7d4 100755 --- a/test/test_Rabin.py +++ b/test/test_Rabin.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +from __future__ import print_function + import os import random @@ -15,7 +17,7 @@ reached = [] def block_reached(start, length, fingerprint): - # print '(%s, %s, %s)' % (start, length, fingerprint) + # print('(%s, %s, %s)' % (start, length, fingerprint)) reached.append((start, length, fingerprint)) r = Rabin() diff --git a/test/test_delta.py b/test/test_delta.py index 64d1d68..dafe7f8 100755 --- a/test/test_delta.py +++ b/test/test_delta.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +from __future__ import print_function + import os import random diff --git a/test/test_insert.py b/test/test_insert.py index 22db496..d636ea2 100755 --- a/test/test_insert.py +++ b/test/test_insert.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +from __future__ import print_function + import os import random diff --git a/test/test_streaming.py b/test/test_streaming.py index 2ea3b9d..e66b298 100755 --- a/test/test_streaming.py +++ b/test/test_streaming.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +from __future__ import print_function + import os import random import resource diff --git a/test/test_swap.py b/test/test_swap.py index 8d3e8cf..4c40b78 100755 --- a/test/test_swap.py +++ b/test/test_swap.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +from __future__ import print_function + import os import random diff --git a/test/test_swap_twofile.py b/test/test_swap_twofile.py index c024da2..ed9f5ac 100755 --- a/test/test_swap_twofile.py +++ b/test/test_swap_twofile.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +from __future__ import print_function + import os import sys