diff --git a/examples/05-vector/07-slp.py b/examples/05-vector/07-slp.py index 8deae918..0f692777 100644 --- a/examples/05-vector/07-slp.py +++ b/examples/05-vector/07-slp.py @@ -36,10 +36,13 @@ def corpus(path, encoding="utf-8"): which is a .txt file with a sentence on each line, with slash-encoded tokens (e.g., the/DT cat/NN). """ + result = [] for s in open(path, encoding=encoding): s = list(map(lambda w: w.split("/"), s.strip().split(" "))) s = list(map(lambda w: (w[0].replace("&slash;", "/"), w[1]), s)) - yield s + # yield s + result.append(s) + return result # The corpus is included in the Pattern download zip, in pattern/test/corpora: path = os.path.join(os.path.dirname(__file__), "..", "..", "test", "corpora", "tagged-en-oanc.txt") diff --git a/pattern/db/__init__.py b/pattern/db/__init__.py index a6fd0afe..0fdf31f0 100644 --- a/pattern/db/__init__.py +++ b/pattern/db/__init__.py @@ -1650,8 +1650,11 @@ def __repr__(self): def associative(query): """ Yields query rows as dictionaries of (field, value)-items. """ + result = [] for row in query: - yield query.record(row) + # yield query.record(row) + result.append(query.record(row)) + return result assoc = associative @@ -2351,8 +2354,11 @@ def __len__(self): return len(self._datasheet) def __iter__(self): + result = [] for i in range(len(self)): - yield list.__getitem__(self._datasheet, i) + # yield list.__getitem__(self._datasheet, i) + result.append(list.__getitem__(self._datasheet, i)) + return result def __repr__(self): return repr(self._datasheet) @@ -2436,8 +2442,11 @@ def __len__(self): return len(self._datasheet) > 0 and len(self._datasheet[0]) or 0 def __iter__(self): + result = [] for i in range(len(self)): - yield self.__getitem__(i) + # yield self.__getitem__(i) + result.append(self.__getitem__(i)) + return result def __repr__(self): return repr(list(iter(self))) @@ -2566,8 +2575,11 @@ def __len__(self): return len(self._datasheet) def __iter__(self): # Can be put more simply but optimized for performance: + result = [] for i in range(len(self)): - yield list.__getitem__(self._datasheet, i)[self._j] + # yield list.__getitem__(self._datasheet, i)[self._j] + result.append(list.__getitem__(self._datasheet, i)[self._j]) + return result def __reversed__(self): return reversed(list(iter(self))) diff --git a/pattern/graph/__init__.py b/pattern/graph/__init__.py index 85411b8e..8b121395 100644 --- a/pattern/graph/__init__.py +++ b/pattern/graph/__init__.py @@ -898,11 +898,13 @@ def dijkstra_shortest_path(graph, id1, id2, heuristic=None, directed=False): Raises an IndexError between nodes on unconnected graphs. """ # Based on: Connelly Barnes, http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/119466 - def flatten(list): + def flatten(linked_list): # Flattens a linked list of the form [0,[1,[2,[]]]] - while len(list) > 0: - yield list[0] - list = list[1] + result = [] + while len(linked_list) > 0: + # yield list[0] + result.append(linked_list[0]) + linked_list = linked_list[1] G = adjacency(graph, directed=directed, heuristic=heuristic) q = [(0, id1, ())] # Heap of (cost, path_head, path_rest). visited = set() # Visited nodes. diff --git a/pattern/metrics.py b/pattern/metrics.py index d3abca0a..e2a0f4cc 100644 --- a/pattern/metrics.py +++ b/pattern/metrics.py @@ -69,9 +69,12 @@ def cumsum(iterable): """ Returns an iterator over the cumulative sum of values in the given list. """ n = 0 + result = [] for x in iterable: n += x - yield n + # yield n + result.append(n) + return result #### PROFILER ###################################################################################### @@ -465,9 +468,12 @@ def type_token_ratio(string, n=100, punctuation=PUNCTUATION): as opposed to the total number of words (= lexical diversity, vocabulary richness). """ def window(a, n=100): + result = [] if n > 0: for i in range(max(len(a) - n + 1, 1)): - yield a[i:i + n] + # yield a[i:i + n] + result.append(a[i:i + n]) + return result s = string.lower().split() s = [w.strip(punctuation) for w in s] # Covington & McFall moving average TTR algorithm. @@ -519,16 +525,20 @@ def isplit(string, sep="\t\n\x0b\x0c\r "): This is efficient in combination with cooccurrence(), since the string may be very long (e.g., Brown corpus). """ + result = [] a = [] for ch in string: if ch not in sep: a.append(ch) continue if a: - yield "".join(a) + # yield "".join(a) + result.append("".join(a)) a = [] if a: - yield "".join(a) + # yield "".join(a) + result.append("".join(a)) + return result def cooccurrence(iterable, window=(-1, -1), term1=lambda x: True, term2=lambda x: True, normalize=lambda x: x, matrix=None, update=None): @@ -665,14 +675,18 @@ def _multiple(v, round=False): if b is None: a, b = 0, a if a == b: - yield float(a) - raise StopIteration + # yield float(a) + return float(a) + # raise StopIteration r = _multiple(b - a) t = _multiple(r / (n - 1), round=True) a = floor(a / t) * t b = ceil(b / t) * t + result = [] for i in range(int((b - a) / t) + 1): - yield a + i * t + # yield a + i * t + result.append(a + i * t) + return result #### STATISTICS #################################################################################### @@ -733,11 +747,14 @@ def simple_moving_average(iterable, k=10): """ Returns an iterator over the simple moving average of the given list of values. """ a = iterable if isinstance(iterable, list) else list(iterable) + result = [] for m in range(len(a)): i = m - k j = m + k + 1 w = a[max(0, i):j] - yield float(sum(w)) / (len(w) or 1) + # yield float(sum(w)) / (len(w) or 1) + result.append(float(sum(w)) / (len(w) or 1)) + return result sma = simple_moving_average diff --git a/pattern/server/__init__.py b/pattern/server/__init__.py index d310556b..628b558d 100644 --- a/pattern/server/__init__.py +++ b/pattern/server/__init__.py @@ -1606,28 +1606,36 @@ def _render(self, compiled, *args, **kwargs): k.update(kwargs) k["template"] = template indent = kwargs.pop("indent", False) + result = [] for cmd, v, w in compiled: if indent is False: w = "" if cmd is None: continue elif cmd == "": - yield self._encode(v, w) + # yield self._encode(v, w) + result.append(self._encode(v, w)) elif cmd == "": - yield self._encode(k.get(v, "$" + v), w) + # yield self._encode(k.get(v, "$" + v), w) + result.append(self._encode(k.get(v, "$" + v), w)) elif cmd == "": - yield "".join(self._render(v[1], k)) if eval(v[0]) else "" + # yield "".join(self._render(v[1], k)) if eval(v[0]) else "" + result.append("".join(self._render(v[1], k)) if eval(v[0]) else "") elif cmd == "": - yield "".join(["".join(self._render(v[2], k, self._dict(v[0], i))) for i in eval(v[1], k)]) + # yield "".join(["".join(self._render(v[2], k, self._dict(v[0], i))) for i in eval(v[1], k)]) + result.append("".join(["".join(self._render(v[2], k, self._dict(v[0], i))) for i in eval(v[1], k)])) elif cmd == "": - yield self._encode(eval(v, k), w) + # yield self._encode(eval(v, k), w) + result.append(self._encode(eval(v, k), w)) elif cmd == "": o = StringIO() k["write"] = o.write # Code blocks use write() for output. exec(v, k) - yield self._encode(o.getvalue(), w) + # yield self._encode(o.getvalue(), w) + result.append(self._encode(o.getvalue(), w)) del k["write"] o.close() + return result def render(self, *args, **kwargs): """ Returns the rendered template as a string. diff --git a/pattern/text/__init__.py b/pattern/text/__init__.py index aa77af0b..755a4028 100644 --- a/pattern/text/__init__.py +++ b/pattern/text/__init__.py @@ -589,6 +589,7 @@ def _read(path, encoding="utf-8", comment=";;;"): """ Returns an iterator over the lines in the file at the given path, strippping comments and decoding each line to Unicode. """ + result = [] if path: if isinstance(path, str) and os.path.exists(path): # From file path. @@ -605,8 +606,10 @@ def _read(path, encoding="utf-8", comment=";;;"): line = decode_utf8(line, encoding) if not line or (comment and line.startswith(comment)): continue - yield line - raise StopIteration + # yield line + result.append(line) + # raise StopIteration + return result class Lexicon(lazydict): diff --git a/pattern/text/en/wordnet/__init__.py b/pattern/text/en/wordnet/__init__.py index 022285fd..c175b354 100644 --- a/pattern/text/en/wordnet/__init__.py +++ b/pattern/text/en/wordnet/__init__.py @@ -183,6 +183,7 @@ def __init__(self, synset): def __iter__(self): for s in self.synonyms: yield s + # return (s for s in self.synonyms) def __len__(self): return len(self.synonyms) diff --git a/pattern/text/search.py b/pattern/text/search.py index 6efd5199..1c1125e0 100644 --- a/pattern/text/search.py +++ b/pattern/text/search.py @@ -168,11 +168,14 @@ def product(*args, **kwargs): ("t", "a"), ("t", "t")] """ + result = [] p = [[]] for iterable in map(tuple, args) * kwargs.get("repeat", 1): p = [x + [y] for x in p for y in iterable] for p in p: - yield tuple(p) + # yield tuple(p) + result.append(tuple(p)) + return result try: from itertools import product @@ -722,14 +725,18 @@ def __init__(self, sequence=[], *args, **kwargs): # Parse nested lists and tuples from the sequence into groups. # [DT [JJ NN]] => Match.group(1) will yield the JJ NN sequences. def _ungroup(sequence, groups=None): + result = [] for v in sequence: if isinstance(v, (list, tuple)): if groups is not None: groups.append(list(_ungroup(v, groups=None))) for v in _ungroup(v, groups): - yield v + # yield v + result.append(v) else: - yield v + # yield v + result.append(v) + return result self.groups = [] self.sequence = list(_ungroup(sequence, groups=self.groups)) # Assign Constraint.index: diff --git a/pattern/text/tree.py b/pattern/text/tree.py index 21e75ec7..951b93e3 100644 --- a/pattern/text/tree.py +++ b/pattern/text/tree.py @@ -128,9 +128,12 @@ def __len__(self): def __iter__(self): i = 0 + result = [] while i < len(self._a): - yield self._f(self._a[i]) + # yield self._f(self._a[i]) + result.append(self._f(self._a[i])) i += 1 + return result ### SENTENCE ####################################################################################### @@ -1037,8 +1040,11 @@ def loop(self, *tags): Possible tags: WORD, LEMMA, POS, CHUNK, PNP, RELATION, ROLE, ANCHOR or a custom word tag. Any order or combination of tags can be supplied. """ + result = [] for i in range(len(self.words)): - yield tuple([self.get(i, tag=tag) for tag in tags]) + # yield tuple([self.get(i, tag=tag) for tag in tags]) + result.append(tuple([self.get(i, tag=tag) for tag in tags])) + return result def indexof(self, value, tag=WORD): """ Returns the indices of tokens in the sentence where the given token tag equals the string. diff --git a/pattern/vector/__init__.py b/pattern/vector/__init__.py index 76a368ab..01243558 100644 --- a/pattern/vector/__init__.py +++ b/pattern/vector/__init__.py @@ -118,10 +118,13 @@ def chunk(iterable, n): n = int(n) i = 0 j = 0 + result = [] for m in range(n): j = i + len(a[m::n]) - yield a[i:j] + # yield a[i:j] + result.append(a[i:j]) i = j + return result def mix(iterables=[], n=10): @@ -129,10 +132,13 @@ def mix(iterables=[], n=10): """ # list(mix([[1, 2, 3, 4], ["a", "b"]], n=2)) => [1, 2, "a", 3, 4, "b"] a = [list(chunk(x, n)) for x in iterables] + result = [] for i in range(int(n)): for x in a: for item in x[i]: - yield item + # yield item + result.append(item) + return result def bin(iterable, key=lambda x: x, value=lambda x: x): @@ -2136,9 +2142,12 @@ def sequence(i=0, f=lambda i: i + 1): # Used to generate unique vector id's in hierarchical(). # We cannot use Vector.id, since the given vectors might be plain dicts. # We cannot use id(vector), since id() is only unique for the lifespan of the object. + result = [] while True: - yield i + # yield i + result.append(i) i = f(i) + return result def hierarchical(vectors, k=1, iterations=1000, distance=COSINE, **kwargs): @@ -2569,14 +2578,20 @@ def chunks(iterable, n=10): a = list(iterable) i = 0 j = 0 + result = [] for m in range(n): j = i + len(a[m::n]) - yield a[i:j] + # yield a[i:j] + result.append(a[i:j]) i = j + return result k = kwargs.get("k", K) d = list(chunks(documents, max(k, 2))) + res = [] for holdout in range(k): - yield list(chain(*(d[:holdout] + d[holdout + 1:]))), d[holdout] + # yield list(chain(*(d[:holdout] + d[holdout + 1:]))), d[holdout] + res.append(list(chain(*(d[:holdout] + d[holdout + 1:]))), d[holdout]) + return res _folds = folds @@ -2595,10 +2610,13 @@ def product(*args): # Yields the cartesian product of given iterables: # list(product([1, 2], [3, 4])) => [(1, 3), (1, 4), (2, 3), (2, 4)] p = [[]] + result = [] for iterable in args: p = [x + [y] for x in p for y in iterable] for p in p: - yield tuple(p) + # yield tuple(p) + result.append(tuple(p)) + return result s = [] # [((A, P, R, F, o), parameters), ...] p = [] # [[("c", 0.1), ("c", 10), ...], # [("gamma", 0.1), ("gamma", 0.2), ...], ...] diff --git a/pattern/web/__init__.py b/pattern/web/__init__.py index c76cea88..f0e2bac4 100644 --- a/pattern/web/__init__.py +++ b/pattern/web/__init__.py @@ -2352,8 +2352,11 @@ def articles(self, **kwargs): Optional parameters can include those passed to MediaWiki.index(), MediaWiki.search() and URL.download(). """ + result = [] for title in self.index(**kwargs): - yield self.search(title, **kwargs) + # yield self.search(title, **kwargs) + result.append(self.search(title, **kwargs)) + return result # Backwards compatibility. all = articles @@ -2367,6 +2370,7 @@ def index(self, namespace=0, start=None, count=100, cached=True, **kwargs): id = kwargs.pop("_id", "title") # Loop endlessly (= until the last request no longer yields an "apcontinue"). # See: http://www.mediawiki.org/wiki/API:Allpages + result = [] while start != -1: url = URL(self._url, method=GET, query={ "action": "query", @@ -2381,10 +2385,12 @@ def index(self, namespace=0, start=None, count=100, cached=True, **kwargs): data = json.loads(data) for x in data.get("query", {}).get("allpages", {}): if x.get(id): - yield x[id] + # yield x[id] + result.append(x[id]) start = data.get("query-continue", {}).get("allpages", {}) start = start.get("apcontinue", start.get("apfrom", -1)) - raise StopIteration + # raise StopIteration + return result # Backwards compatibility. list = index diff --git a/pattern/web/imap/__init__.py b/pattern/web/imap/__init__.py index 0a20fe8e..44b22ea3 100644 --- a/pattern/web/imap/__init__.py +++ b/pattern/web/imap/__init__.py @@ -273,6 +273,7 @@ def __iter__(self): """ for i in reversed(range(len(self))): yield self[i] + # return (self[i] for i in reversed(range(len(self)))) def __len__(self): status, response = self.parent.imap4.select(self.name, readonly=1)