-
Notifications
You must be signed in to change notification settings - Fork 1
/
sample.py
72 lines (62 loc) · 2.1 KB
/
sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import random
import math
import histogram_maker
def cumulative_distribution(histogram):
"""compute the cumulative distribution function given pdf"""
cumulative = []
sum = 0
for key, value in histogram.items():
# print("key, value:", key, value)
sum += value
cumulative.append((key, sum))
print(cumulative)
return cumulative
def binary_search(cumulative, target):
"""search through the list to find the target.
If the target is not found, returns the next index.
"""
left = 0
right = len(cumulative) - 1
while left < right:
middle = int(math.floor((left + right) / 2))
if cumulative[middle][1] == target:
return middle
elif cumulative[middle][1] < target:
left = middle + 1
elif cumulative[middle][1] > target:
right = middle - 1
return left if target <= cumulative[left][1] else left + 1
def sample(cumulative):
"""Generate sample from distribution"""
totals = cumulative[-1][1]
random_int = random.randint(1, totals)
return cumulative[binary_search(cumulative, random_int)][0]
def read_hist(text_file):
"""read the histogram from a txt file"""
histogram = []
with open(text_file, 'r') as f:
for index in f:
hist_entry = index.split()
histogram.append((hist_entry[0], int(hist_entry[1])))
return histogram
def num_of_words(text, num):
"""use this functions to return a certain number of totally random words"""
all_words = []
if num >= 1:
for i in range(num):
word = sample(cumulative_distribution(read_hist(text)))
all_words.append(word)
else:
word = sample(cumulative_distribution(read_hist(text)))
all_words.append(word)
return all_words
def print_sample():
"""this function takes input and prints out the sample text for you"""
import sys
args = "".join(sys.argv[1:])
num = int(args)
list_sample = num_of_words(num)
sample = " ".join(list_sample)
print(sample)
if __name__ == '__main__':
print(" ".join(num_of_words(4))+ ".")