Skip to content

Commit

Permalink
Implements reservoir sampler randomly sampling stream of features, cl…
Browse files Browse the repository at this point in the history
…oses #7
  • Loading branch information
daniel-j-h committed Jun 14, 2018
1 parent 03bc25c commit 5e44fcc
Showing 1 changed file with 72 additions and 0 deletions.
72 changes: 72 additions & 0 deletions robosat/osm/sampler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import random


class ReservoirSampler:
'''Randomly samples k items from a stream of unknown n items.
'''

def __init__(self, capacity):
'''Creates an new `ReservoirSampler` instance.
Args:
capacity: the number of items to randomly sample from a stream of unknown size.
'''

assert capacity > 0

self.capacity = capacity
self.reservoir = []
self.pushed = 0

def push(self, v):
'''Adds an item to the reservoir.
Args:
v: the item from the stream to add to the reservoir.
'''

size = len(self.reservoir)

if size < self.capacity:
self.reservoir.append(v)
else:
assert size == self.capacity
assert size <= self.pushed

p = self.capacity / self.pushed

if random.random() < p:
i = random.randint(0, size - 1)
self.reservoir[i] = v

self.pushed += 1

def __len__(self):
'''Returns the number of randomly sampled items.
Returns:
The number of randomly sampled items in the reservoir.
'''

return len(self.reservoir)

def __getitem__(self, k):
'''Returns a randomly sampled item in the reservoir.
Args:
k: the index for the kth item from the reservoir to return.
Returns:
The kth item in the reservoir of randomly sampled items.
'''

return self.reservoir[k]

def __repr__(self):
'''Returns the representation for this class.
Returns:
The string representation for this class.
'''

return '<{}: {}>'.format(self.__class__.__name__, list(self))

0 comments on commit 5e44fcc

Please sign in to comment.