Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding edges() and iteredges() Functions for DAWGs #1

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
926d6e8
working edges() for all but utf-8
EliFinkelshteyn Apr 13, 2015
fa6cd76
edges() working with unicode; added working iteredges(); added basic …
EliFinkelshteyn Apr 13, 2015
8e7390a
adding terminal indicators on edges; adding edge values for IntComple…
EliFinkelshteyn Apr 14, 2015
0211c19
adding tests for larger values to intdawg and intcompletiondawg; repl…
EliFinkelshteyn Apr 14, 2015
30bf53b
edges() and iteredges() now work for all applicable dawgs; tests adde…
EliFinkelshteyn Apr 15, 2015
15355be
items() miss returns empty list; adding test for this; moving appropr…
EliFinkelshteyn Apr 15, 2015
dee560c
b_get_value should always get bytes, not decoded unicode; utf8 should…
EliFinkelshteyn Apr 15, 2015
2a93173
adding explicit bytes() cast for b_get_value() for python 2.x compati…
EliFinkelshteyn Apr 15, 2015
c94b4d8
edges and iter_edges always return boolean terminal; adding edges_dat…
EliFinkelshteyn Apr 16, 2015
8cb08f3
forgot to add one test in last commit
EliFinkelshteyn Apr 16, 2015
f3baac8
adding tests for RecordDawg edges_data() and edgesiter_data()
EliFinkelshteyn Apr 16, 2015
77f3802
don't treat payload_separator as a normal edge
EliFinkelshteyn Apr 20, 2015
ae7472a
use ord instead of hacking with bytearray
EliFinkelshteyn Apr 20, 2015
4975f07
adding unicode tests; starting to fix multibyte unicode issues
EliFinkelshteyn Apr 27, 2015
1207380
working for all but multibyte
EliFinkelshteyn Apr 28, 2015
5462916
working with multibyte unicode tests
EliFinkelshteyn Apr 28, 2015
0b81a9f
xrange -> range for py34 compatibility
EliFinkelshteyn Apr 28, 2015
2cbd340
removing literal unicode for py32 compatibility in tests
EliFinkelshteyn Apr 28, 2015
f56e2b9
removing 'u' literal prefixes
EliFinkelshteyn Apr 28, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 201 additions & 9 deletions dawg_python/dawgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,39 @@ def keys(self, prefix=""):

return res

def children(self, prefix=""):
b_prefix = prefix.encode('utf8')
res = []

index = self.dct.follow_bytes(b_prefix, self.dct.ROOT)
if index is None:
return res

edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
if not edge_follower.start(index, b_prefix):
return res

res.append(edge_follower.get_cur_child())
while edge_follower.next():
res.append(edge_follower.get_cur_child())

return res

def iterchildren(self, prefix=""):
b_prefix = prefix.encode('utf8')

index = self.dct.follow_bytes(b_prefix, self.dct.ROOT)
if index is None:
return

edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
if not edge_follower.start(index, b_prefix):
return

yield edge_follower.get_cur_child()
while edge_follower.next():
yield edge_follower.get_cur_child()

def iterkeys(self, prefix=""):
b_prefix = prefix.encode('utf8')
index = self.dct.follow_bytes(b_prefix, self.dct.ROOT)
Expand Down Expand Up @@ -279,15 +312,14 @@ def iterkeys(self, prefix=""):
yield u_key

def items(self, prefix=""):
index = self.dct.ROOT
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')
res = []

index = self.dct.ROOT
if prefix:
index = self.dct.follow_bytes(prefix, index)
if not index:
return res
return []
res = []

completer = wrapper.Completer(self.dct, self.guide)
completer.start(index, prefix)
Expand All @@ -301,10 +333,9 @@ def items(self, prefix=""):
return res

def iteritems(self, prefix=""):
index = self.dct.ROOT
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')

index = self.dct.ROOT
if prefix:
index = self.dct.follow_bytes(prefix, index)
if not index:
Expand All @@ -315,9 +346,95 @@ def iteritems(self, prefix=""):

while completer.next():
key, value = completer.key.split(self._payload_separator)
item = (key.decode('utf8'), a2b_base64(bytes(value))) # bytes() cast is a python 2.6 fix
# bytes() cast is a python 2.6 fix
item = (key.decode('utf8'), a2b_base64(bytes(value)))
yield item

def children(self, prefix=""):
index = self.dct.ROOT
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')
if prefix:
index = self.dct.follow_bytes(prefix, index)
if not index:
return
res = []

edge_follower = wrapper.EdgeFollower(self.dct, self.guide,
self._payload_separator)
if not edge_follower.start(index, prefix):
return res

val = True if self._follow_key(bytes(edge_follower.key)) else False
res.append((edge_follower.decoded_key, val))
while edge_follower.next():
val = True if self._follow_key(bytes(edge_follower.key)) else False
res.append((edge_follower.decoded_key, val))
return res

def iterchildren(self, prefix=""):
index = self.dct.ROOT
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')
if prefix:
index = self.dct.follow_bytes(prefix, index)
if not index:
return

edge_follower = wrapper.EdgeFollower(self.dct, self.guide,
self._payload_separator)
if not edge_follower.start(index, prefix):
return

val = True if self._follow_key(bytes(edge_follower.key)) else False
yield (edge_follower.decoded_key, val)
while edge_follower.next():
val = True if self._follow_key(bytes(edge_follower.key)) else False
yield (edge_follower.decoded_key, val)

def children_data(self, prefix=""):
index = self.dct.ROOT
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')
if prefix:
index = self.dct.follow_bytes(prefix, index)
if not index:
return
res = []

edge_follower = wrapper.EdgeFollower(self.dct, self.guide,
self._payload_separator)
if not edge_follower.start(index, prefix):
return res

vals = self.b_get_value(bytes(edge_follower.key)) or [None]
res.extend([(edge_follower.decoded_key, val) for val in vals])
while edge_follower.next():
vals = self.b_get_value(bytes(edge_follower.key)) or [None]
res.extend([(edge_follower.decoded_key, val) for val in vals])
return res

def iterchildren_data(self, prefix=""):
index = self.dct.ROOT
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')
if prefix:
index = self.dct.follow_bytes(prefix, index)
if not index:
return

edge_follower = wrapper.EdgeFollower(self.dct, self.guide,
self._payload_separator)
if not edge_follower.start(index, prefix):
return

vals = self.b_get_value(bytes(edge_follower.key)) or [None]
for val in vals:
yield (edge_follower.decoded_key, val)
while edge_follower.next():
vals = self.b_get_value(bytes(edge_follower.key)) or [None]
for val in vals:
yield (edge_follower.decoded_key, val)

def _has_value(self, index):
return self.dct.follow_bytes(PAYLOAD_SEPARATOR, index)
Expand Down Expand Up @@ -368,7 +485,6 @@ def similar_items(self, key, replaces):
"""
return self._similar_items("", key, self.dct.ROOT, replaces)


def _similar_item_values(self, start_pos, key, index, replace_chars):
res = []
end_pos = len(key)
Expand Down Expand Up @@ -424,15 +540,17 @@ def _value_for_index(self, index):

def items(self, prefix=""):
res = super(RecordDAWG, self).items(prefix)
print("items data:")
print(res)
return [(key, self._struct.unpack(val)) for (key, val) in res]

def iteritems(self, prefix=""):
res = super(RecordDAWG, self).iteritems(prefix)
return ((key, self._struct.unpack(val)) for (key, val) in res)


LOOKUP_ERROR = -1


class IntDAWG(DAWG):
"""
Dict-like class based on DAWG.
Expand Down Expand Up @@ -464,6 +582,80 @@ class IntCompletionDAWG(CompletionDAWG, IntDAWG):
Dict-like class based on DAWG.
It can store integer values for unicode keys and support key completion.
"""
def children(self, prefix=""):
index = self.dct.ROOT
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')
if prefix:
index = self.dct.follow_bytes(prefix, index)
if not index:
return
res = []

edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
if not edge_follower.start(index, prefix):
return res

res.append((edge_follower.decoded_key, edge_follower.has_value()))
while edge_follower.next():
res.append((edge_follower.decoded_key, edge_follower.has_value()))

return res

def iterchildren(self, prefix=""):
index = self.dct.ROOT
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')
if prefix:
index = self.dct.follow_bytes(prefix, index)
if not index:
return

edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
if not edge_follower.start(index, prefix):
return

yield (edge_follower.decoded_key, edge_follower.has_value())
while edge_follower.next():
yield (edge_follower.decoded_key, edge_follower.has_value())

def children_data(self, prefix=""):
index = self.dct.ROOT
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')
if prefix:
index = self.dct.follow_bytes(prefix, index)
if not index:
return
res = []

edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
if not edge_follower.start(index, prefix):
return res

res.append((edge_follower.decoded_key, edge_follower.value()))
while edge_follower.next():
res.append((edge_follower.decoded_key, edge_follower.value()))

return res

def iterchildren_data(self, prefix=""):
index = self.dct.ROOT
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')
if prefix:
index = self.dct.follow_bytes(prefix, index)
if not index:
return

edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
if not edge_follower.start(index, prefix):
return

yield (edge_follower.decoded_key, edge_follower.value())
while edge_follower.next():
yield (edge_follower.decoded_key, edge_follower.value())

def items(self, prefix=""):
if not isinstance(prefix, bytes):
prefix = prefix.encode('utf8')
Expand Down
Loading