-
Notifications
You must be signed in to change notification settings - Fork 1
/
alt
executable file
·217 lines (180 loc) · 6.47 KB
/
alt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#!/usr/bin/env bash
# vim: set ft=ruby:
# This file executes as a bash script, which turns around and executes Ruby via
# the line below. The -x argument to Ruby makes it discard everything before
# the second "!ruby" shebang. This allows us to work on Linux, where the
# shebang can only have one argument so we can't directly say
# "#!/usr/bin/env ruby --disable-gems". Thanks for that, Linux.
#
# If this seems confusing, don't worry. You can treat it as a normal Ruby file
# starting with the "!ruby" shebang below.
exec /usr/bin/env ruby --disable-gems -x "$0" $*
#!ruby
if RUBY_VERSION < '1.9.3'
abort 'error: Alt requires Ruby 1.9.3 or higher'
end
require 'optparse'
require 'benchmark'
class Alt
VERSION='1.0.0'
def initialize(argv: ARGV)
@argv = argv
@path = nil
@options = { version: false, stdin: false, debug: false }
parse_options(@argv)
parse_args
end
def main
display_version and return 0 if @options[:version]
highest_score = 0
highest_scored_poss_alt_path = nil
# Get list of possible file paths for alternates
possible_alternate_paths = []
if @options[:stdin]
possible_alternate_paths = get_possible_files_from_stdin
else
possible_alternate_paths = get_possible_files_from_glob
end
# Filter list of possible file paths for alternates down so that scoring has
# to happen on as few items as possible because scoring is expensive due to
# the find the longest common substring dependendency. Note: This may be
# able to be improved as I used a naieve implementation of longest common
# substring. But, we should still reduce the work if we can.
filename = get_filename_minus_extension(@path)
filtered_possible_alternate_paths = []
if @path.test_file?
filename = strip_test_words(filename)
filtered_possible_alternate_paths = possible_alternate_paths.select { |p| p.include?(filename) && !p.test_file? }
else
filtered_possible_alternate_paths = possible_alternate_paths.select { |p| p.include?(filename) && p.test_file? }
end
# Score all the filtered possible alternates keeping track of the highest
# scored one as that should be our alternate, or as close as we can get to
# our alternate.
filtered_possible_alternate_paths.each do |poss_alt_path|
score = Judge.score(poss_alt_path, @path)
if score > highest_score
highest_score = score
highest_scored_poss_alt_path = poss_alt_path
end
debug "#{score}: #{@path} - #{poss_alt_path}"
end
debug "path to find alternate for: #{@path}"
debug "# possible alternate paths: #{possible_alternate_paths.count}"
debug "# filtered possible alternate paths: #{filtered_possible_alternate_paths.count}"
debug "highest_score: #{highest_score}"
debug "highest_scored_poss_alt_path: #{highest_scored_poss_alt_path}"
if highest_scored_poss_alt_path
$stdout.write(highest_scored_poss_alt_path)
else
$stdout.write('')
end
return 0
end
private
def debug(description)
if @options[:debug]
puts description
end
end
def parse_options(argv)
OptionParser.new do |opts|
opts.banner = "Usage: alt [options] <path>"
opts.on('-v', '--version', 'Display version') do
@options[:version] = true
end
opts.on('-d', '--debug', 'Debug mode') do
@options[:debug] = true
end
opts.on('--', '--', 'Use stdin as possible files') do
@options[:stdin] = true
end
end.parse!(argv)
end
def parse_args
@path = Path.new(@argv[0]) if @argv.length > 0
end
def display_version
$stdout.write("alt v#{Alt::VERSION}\n")
end
private
def get_possible_files_from_stdin
$stdin.readlines.map { |p| Path.new(p) }
end
def get_possible_files_from_glob
Dir.glob("**/*").reject { |p| File.directory?(p) }.map { |p| Path.new(p) }
end
def get_filename_minus_extension(path)
File.basename(path, ".*") # filename without extension
end
def strip_test_words(filename)
filename.gsub(/(test_)?(\w+?)(_rake_spec|_spec|_test|_steps)?(\.rb|\.exs|\.ex|\.js|\.py)?$/, '\2')
end
end
class Judge
def self.score(query, str)
# TODO: Improve performance. At the moment the find_longest_common_substring
# method is the most costly operation in this app. This leaves us with a few
# levers to move in terms of improving performance. Specifically, we could
# find a more performant algorithm, or find a way to reduce the number of
# times it has to be called, or use threading to do a scatter and gather
# approach.
longest_match = find_longest_common_substring(query, str)
return (longest_match.length.to_f/str.length.to_f) * (longest_match.length.to_f/query.length.to_f)
end
private
def self.find_longest_common_substring(s1, s2)
# Currently this is implemented using a dynamic programming solution similar
# to http://www.geeksforgeeks.org/longest-common-substring/. This is O(N*M)
# where N is the length of one string and M is the length of the other
# string.
#
# Another option would of course be to explore using something like a
# suffix tree to solve this problem, something like, the following.
# http://www.geeksforgeeks.org/suffix-tree-application-5-longest-common-substring-2/
# This is O(M+N) to build a Generalized Suffix Tree and O(M+N) to find the
# the longest common substring via depth first search.
#
# Beyond that we would have to explore not caring about longest substring
# and moving to a similarity ranking algorithm that maybe cares about
# subsequences rather that substrings, etc.
if (s1 == "" || s2 == "")
return ""
end
m = Array.new(s1.length){ [0] * s2.length }
longest_length, longest_end_pos = 0,0
(0 .. s1.length - 1).each do |x|
(0 .. s2.length - 1).each do |y|
if s1[x] == s2[y]
m[x][y] = 1
if (x > 0 && y > 0)
m[x][y] += m[x-1][y-1]
end
if m[x][y] > longest_length
longest_length = m[x][y]
longest_end_pos = x
end
end
end
end
return s1[longest_end_pos - longest_length + 1 .. longest_end_pos]
end
end
class Path < String
def initialize(path)
super(cleanse_path(path))
end
def test_file?
self.start_with?('features/','test/', 'spec/', 'tests/')
end
def directory?
File.directory?(self)
end
private
def cleanse_path(str)
str.strip.gsub(/^\.\//, '')
end
end
if $0 == __FILE__
exit Alt.new.main
end