Skip to content

Commit

Permalink
Run Ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
laraabastoss committed Jun 22, 2024
1 parent 006709a commit 55a827b
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
7 changes: 4 additions & 3 deletions river/sketch/hierarchical_heavy_hitters.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import annotations
import typing

import math
import typing

from river import base


class HierarchicalHeavyHitters(base.Base):

"""Full Ancestry Algorithm implementation for the Hierarchical Heavy Hitters problem.[^1]
Expand Down Expand Up @@ -140,7 +141,7 @@ def __init__(self):
self.max_e = 0
self.fe = 0
self.m_fe = 0
self.children: typing.Dict[typing.Hashable, HierarchicalHeavyHitters.Node] = {}
self.children: typing.dict[typing.Hashable, HierarchicalHeavyHitters.Node] = {}

def __init__(self, k: int, epsilon: float, parent_func: typing.Callable[[typing.Hashable, int], typing.Hashable] = None, root_value: typing.Hashable = None):
self.k = k
Expand Down Expand Up @@ -219,7 +220,7 @@ def _compress_node(self, node: HierarchicalHeavyHitters.Node):
node.max_e = max (node.max_e, child_node.ge + child_node.delta_e)
del node.children[child_key]

def output(self, phi: float) -> list[typing.Tuple[typing.Hashable, int]]:
def output(self, phi: float) -> list[typing.tuple[typing.Hashable, int]]:
"""Generate a list of heavy hitters with frequency estimates above the given threshold."""
result: list[tuple[typing.Hashable, int]] = []
if self.root:
Expand Down
11 changes: 6 additions & 5 deletions river/sketch/hyper_log_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from river import base


class HyperLogLog(base.Base):

"""HyperLogLog algorithm for cardinality estimation.[^1][^2]
Expand All @@ -13,10 +14,10 @@ class HyperLogLog(base.Base):
of m bytes of auxiliary memory, known as registers.
Firstly, each element in the data set is hashed into a binary string, ensuring data is
uniformly distributed and simulating random distribution. The algorithm hashes each element
into a binary string and then organizes these binary representations into registers.
uniformly distributed and simulating random distribution. The algorithm hashes each element
into a binary string and then organizes these binary representations into registers.
HyperLogLog, represents an improvement over the original LogLog algorithm by utilizing a
HyperLogLog, represents an improvement over the original LogLog algorithm by utilizing a
technique called harmonic mean to estimate the cardinality.
Parameters
Expand Down Expand Up @@ -59,14 +60,14 @@ class HyperLogLog(base.Base):
... hyperloglog.update(i)
>>> print(hyperloglog.count())
100
100
>>> hyperloglog = HyperLogLog(b=15)
>>> for i in range(100):
... hyperloglog.update(i%10)
>>> print(hyperloglog.count())
>>> print(hyperloglog.count())
10
References
Expand Down
14 changes: 7 additions & 7 deletions river/sketch/space_saving.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

import typing


from river import base


class SpaceSaving(base.Base):
"""Space-Saving algorithm for finding heavy hitters.[^1]
The Space-Saving algorithm is designed to find the heavy hitters in a data stream using a
hash map with a fixed amount of memory. It keeps track of the k most frequent items at any
hash map with a fixed amount of memory. It keeps track of the k most frequent items at any
given time, as well as their corresponding approximate frequency.
Upon receiving a new item from the data stream, if it corresponds to a monitored element,
Expand All @@ -20,13 +20,13 @@ class SpaceSaving(base.Base):
Parameters
----------
k
The maximum number of heavy hitters to store. The higher the value of k, the higher the
The maximum number of heavy hitters to store. The higher the value of k, the higher the
accuracy of the algorithm.
Attributes
----------
counts : dict
A dictionary to store the counts of items. The keys correspond to the elements and the
A dictionary to store the counts of items. The keys correspond to the elements and the
values to their respective count.
Methods
Expand All @@ -47,10 +47,10 @@ class SpaceSaving(base.Base):
>>> from river import sketch
>>> spacesaving = sketch.SpaceSaving(k=10)
>>> for i in range(100):
... spacesaving.update(i % 10)
>>> print(len(spacesaving))
10
>>> print(spacesaving.total())
Expand All @@ -59,7 +59,7 @@ class SpaceSaving(base.Base):
{0: 10, 1: 10, 2: 10, 3: 10, 4: 10, 5: 10, 6: 10, 7: 10, 8: 10, 9: 10}
>>> print(spacesaving[10])
10
References
----------
Expand Down

0 comments on commit 55a827b

Please sign in to comment.