-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path001_foundations.py
executable file
·139 lines (115 loc) · 4.85 KB
/
001_foundations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import collections
############################################################
# Problem 3a
def computeMaxWordLength(text):
"""
Given a string |text|, return the longest word in |text|. If there are
ties, choose the word that comes latest in the alphabet.
A word is defined by a maximal sequence of characters without whitespaces.
You might find max() and list comprehensions handy here.
"""
alphaWords = sorted(text.split())
sortedWords = collections.OrderedDict()
for word in alphaWords:
sortedWords[word] = len(word)
sortedWords = collections.OrderedDict(sorted(sortedWords.items(), key=lambda t: t[1]))
return sortedWords.keys()[-1]
############################################################
# Problem 3b
def manhattanDistance(loc1, loc2):
"""
Return the Manhattan distance between two locations, where the locations
are pairs of numbers (e.g., (3, 5)).
"""
return sum(abs(x - y) for x, y in zip(loc1, loc2))
############################################################
# Problem 3c
def mutateSentences(sentence):
"""
High-level idea: generate sentences similar to a given sentence.
Given a sentence (sequence of words), return a list of all possible
alternative sentences of the same length, where each pair of adjacent words
also occurs in the original sentence. Notes:
- The order of the sentences you output doesn't matter.
- You must not output duplicates.
- Your generated sentence can use a word in the original sentence more than
once.
"""
words = sentence.split()
bigrams = zip(*[words[i:] for i in range(2)])
mutatedSentences = dict()
for i in range(0, len(bigrams)):
(mutation1, mutation2) = list(bigrams[i]), list(bigrams[i])
(j, k) = 0, 0
while (j < len(bigrams)):
if (mutation1[-1] == bigrams[j][0]):
mutation1.append(bigrams[j][1])
j = 0
if len(mutation1) == len(words): mutatedSentences[" ".join(mutation1)] = len(mutation1)
if (mutation2[0] == bigrams[len(bigrams) - 1 - j][1]):
mutation2.insert(0, bigrams[len(bigrams) - 1 - j][0])
j = 0
if len(mutation2) == len(words): mutatedSentences[" ".join(mutation2)] = len(mutation2)
k += 1
if k > (len(bigrams) * len(bigrams)): break
j += 1
i += 1
return sorted(mutatedSentences.keys())
############################################################
# Problem 3d
def sparseVectorDotProduct(v1, v2):
"""
Given two sparse vectors |v1| and |v2|, each represented as Counters, return
their dot product.
You might find it useful to use sum() and a list comprehension.
This function will be useful later for linear classifiers.
"""
return (sum(a * b for a, b in zip(v1.values(), v2.values())))
############################################################
# Problem 3e
def incrementSparseVector(v1, scale, v2):
"""
Given two sparse vectors |v1| and |v2|, perform v1 += scale * v2.
This function will be useful later for linear classifiers.
"""
for v in v2.keys(): v1[v] = v1[v] + ( scale * v2[v] )
return (v1) #and also return it b/c not sure if you want that...
############################################################
# Problem 3f
def computeMostFrequentWord(text):
"""
Splits the string |text| by whitespace and returns two things as a pair:
the set of words that occur the maximum number of times, and
their count, i.e.
(set of words that occur the most number of times, that maximum number/count)
You might find it useful to use collections.Counter().
"""
topwords = collections.Counter(text.split()).most_common()
(toplist, lasttop) = [], 0
for top in topwords:
if (top[1] >= lasttop): toplist = [top[0]] + toplist
elif (top[1] < lasttop): break
lasttop = top[1]
return (set(toplist), lasttop)
############################################################
# Problem 3g
def computeLongestPalindrome(text):
"""
A palindrome is a string that is equal to its reverse (e.g., 'ana').
Compute the length of the longest palindrome that can be obtained by deleting
letters from |text|.
For example: the longest palindrome in 'animal' is 'ama'.
Your algorithm should run in O(len(text)^2) time.
You should first define a recurrence before you start coding.
"""
cache = {}
def recurse(text, b, e):
# recurrence finds length of longest palindrome dynamically & w/ memoization:
if(b, e) in cache: return cache[(b, e)]
if b == e: ans = 1
elif b > e: ans = 0
elif text[b] == text[e]: ans = 2 + recurse(text, b+1, e-1)
else: ans = max( recurse(text, b+1, e), recurse(text, b, e-1) )
cache[(b, e)] = ans
return ans
return recurse(text, 0, len(text)-1)