Skip to content

Commit

Permalink
added longest_prefix() method and corresponding tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Dobatymo committed Jul 18, 2018
1 parent f8d1961 commit c9db4e2
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 3 deletions.
61 changes: 60 additions & 1 deletion src/dawg.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ cdef class DAWG:
cpdef bint b_has_key(self, bytes key) except -1:
return self.dct.Contains(key, len(key))

cpdef bytes tobytes(self):
cpdef bytes tobytes(self) except +:
"""
Return raw DAWG content as bytes.
"""
Expand Down Expand Up @@ -249,6 +249,24 @@ cdef class DAWG:

return res

def longest_prefix(self, unicode key):
cdef BaseType index = self.dct.root()
cdef int pos = 1
cdef int lastpos = 0
cdef CharType ch

for ch in key:
if not self.dct.Follow(ch, &index):
break
if self.dct.has_value(index):
lastpos = pos
pos += 1

if lastpos:
return key[:lastpos]
else:
raise KeyError("No prefix found")

def iterprefixes(self, unicode key):
'''
Return a generator with keys of this DAWG that are prefixes of the ``key``.
Expand Down Expand Up @@ -798,7 +816,28 @@ cdef class BytesDAWG(CompletionDAWG):
"""
return self._similar_item_values(0, key, self.dct.root(), replaces)

def longest_prefix(self, unicode key):
cdef BaseType index = self.dct.root()
cdef BaseType tmp
cdef BaseType lastindex
cdef int pos = 1
cdef int lastpos = 0
cdef CharType ch

for ch in key:
if not self.dct.Follow(ch, &index):
break

tmp = index
if self.dct.Follow(self._c_payload_separator, &tmp):
lastpos = pos
lastindex = tmp
pos += 1

if lastpos:
return key[:lastpos], self._value_for_index(lastindex)
else:
raise KeyError("No prefix found")

cdef class RecordDAWG(BytesDAWG):
"""
Expand Down Expand Up @@ -900,6 +939,26 @@ cdef class IntDAWG(DAWG):
cpdef int b_get_value(self, bytes key):
return self.dct.Find(key)

def longest_prefix(self, unicode key):
cdef BaseType index = self.dct.root()
cdef BaseType lastindex
cdef int pos = 1
cdef int lastpos = 0
cdef CharType ch

for ch in key:
if not self.dct.Follow(ch, &index):
break

if self.dct.has_value(index):
lastpos = pos
lastindex = index
pos += 1

if lastpos:
return key[:lastpos], self.dct.value(lastindex)
else:
raise KeyError("No prefix found")

# FIXME: code duplication.
cdef class IntCompletionDAWG(CompletionDAWG):
Expand Down
14 changes: 13 additions & 1 deletion tests/test_dawg.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,12 @@ def test_unicode_sorting(self):
# if data is sorted according to unicode rules.
dawg.DAWG([key1, key2])


def test_longest_prefix(self):
d = dawg.DAWG(["a", "as", "asdf"])
assert d.longest_prefix("a") == "a"
assert d.longest_prefix("as") == "as"
assert d.longest_prefix("asd") == "as"
assert d.longest_prefix("asdf") == "asdf"

class TestIntDAWG(object):

Expand Down Expand Up @@ -148,6 +153,13 @@ def test_int_value_ranges(self):
with pytest.raises(OverflowError):
self.IntDAWG({'f': 2**32-1})

def test_longest_prefix(self):
d = dawg.IntDAWG([("a", 1), ("as", 2), ("asdf", 3)])
assert d.longest_prefix("a") == ("a", 1)
assert d.longest_prefix("as") == ("as", 2)
assert d.longest_prefix("asd") == ("as", 2)
assert d.longest_prefix("asdf") == ("asdf", 3)


class TestIntCompletionDAWG(TestIntDAWG):
IntDAWG = dawg.IntCompletionDAWG # checks that all tests for IntDAWG pass
Expand Down
7 changes: 6 additions & 1 deletion tests/test_payload_dawg.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,12 @@ def test_build_error(self):
with pytest.raises(dawg.Error):
self.dawg(payload_separator=b'f')


def test_longest_prefix(self):
d = dawg.BytesDAWG([("a", b"a1"), ("a", b"a2"), ("as", b"as"), ("asdf", b"asdf")])
assert d.longest_prefix("a") == ("a", [b"a1", b"a2"])
assert d.longest_prefix("as") == ("as", [b"as"])
assert d.longest_prefix("asd") == ("as", [b"as"])
assert d.longest_prefix("asdf") == ("asdf", [b"asdf"])

class TestRecordDAWG(object):

Expand Down

0 comments on commit c9db4e2

Please sign in to comment.