-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathserimi.rb
126 lines (115 loc) · 5 KB
/
serimi.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#Serimi Functionalities.
#Author: Samur Araujo
#Date: 10 April 2011.
#License: SERIMI is distributed under the LGPL[http://www.gnu.org/licenses/lgpl.html] license.
require 'logger'
require 'optparse'
require 'optparse/uri'
options = {}
opts = OptionParser.new do |opts|
opts.banner = "Usage: serimi.rb [options] \n\nExample of use: \nruby serimi.rb -s http://www4.wiwiss.fu-berlin.de/sider/sparql -t http://dbpedia.org/sparql?default-graph-uri=http://dbpedia.org -c http://www4.wiwiss.fu-berlin.de/sider/resource/sider/drugs \n"
# Define the options, and what they do
options[:verbose] = false
opts.on( '-v', '--verbose', 'Output more information' ) do
options[:verbose] = true
end
options[:logfile] = nil
opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) do |file|
options[:logfile] = file
end
options[:source] = nil
opts.on( '-s URI ', '--source URI (MANDATORY)', String, 'Source Virtuoso sparql endpoint - URI' ) do |uri|
raise OptionParser::InvalidArgument, uri + ", not a valid URI." if !(uri =~ /^http[s]?:\/\//)
options[:source] = uri
end
options[:target] = nil
opts.on( '-t URI ', '--target URI (MANDATORY)', String, 'Target Virtuoso sparql endpoint - URI' ) do |uri|
raise OptionParser::InvalidArgument, uri + ", not a valid URI." if !(uri =~ /^http[s]?:\/\//)
options[:target] = uri
end
options[:class] = nil
opts.on( '-c URI ', '--class URI (MANDATORY)',String, 'Source class for interlink - URI' ) do |uri|
raise OptionParser::InvalidArgument, uri + ", not a valid URI." if uri == nil || !(uri =~ /^http[s]?:\/\//)
options[:class] = uri
end
options[:output] = "./output.txt"
opts.on( '-o FILE_NAME', '--output FILE', String, 'Write output to FILE - Default=./output.txt' ) do |file|
options[:output] = file
end
options[:append] = "w"
opts.on( '-a', '--append-output value', String, 'Append output to FILE - A value: a or w - Default=w' ) do |file|
options[:append] = file
end
options[:format] = "txt"
opts.on( '-f', '--output-format value', String, 'Output format: txt, nt. Default=txt' ) do |c|
options[:format] = c
end
options[:chunk] = 20
opts.on( '-k', '--chunk value', Integer, 'Number of source instances processed per interaction, a value >= 2 - Default=20' ) do |c|
options[:chunk] = c
end
options[:topk] = 0
opts.on( '-p', '--top k results', Integer, 'Return only Top K results, a value >= 1 - Default=0' ) do |c|
options[:chunk] = c
end
options[:offset] = 0
opts.on( '-b', '--offset value', Integer, 'Start processing from a specific offset - Default=0' ) do |c|
options[:offset] = c
end
options[:stringthreshold] = 0.7
opts.on( '-x', '--string-threshold value', Float, 'String distance threshold. A value between (0,1) - Default=0.7' ) do |c|
options[:stringthreshold] = c
end
options[:rdsthreshold] = nil
opts.on( '-y', '--rds-threshold value', Float, 'RDS threshold. A value between (0,1) - Default=max(media,mean)' ) do |c|
options[:rdsthreshold] = c
end
options[:usepivot] = 'false'
opts.on( '-u', '--use-pivot value', String, 'Select a pivot to reinvorce the class of interest. A value (false or true) - Default=false' ) do |c|
options[:rdsthreshold] = c
end
options[:blocking] = 'true'
opts.on( '-m', '--sort-source value', String, 'Sort resources before appling the selection phase. A value (false or true) - Default=true' ) do |c|
options[:blocking] = c
end
options[:logfile] = nil
opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) do |file|
if file != nil
$logger = File.open(file, 'a')
def puts(str)
if str.instance_of? Array
str.each{|x|
$logger.write(x.to_s)
$logger.write("\n")
}
else
$logger.write(str.to_s)
$logger.write("\n")
end
# $logger.fsync
end
end
end
opts.on( '-h', '--help', 'Display this screen' ) do
puts opts
exit
end
end
begin
opts.parse!
mandatory = [:source, :class, :target] # Enforce the presence of
missing = mandatory.select{ |param| options[param].nil? } # the -t and -f switches
if not missing.empty? #
puts "Missing options: #{missing.join(', ')}" #
puts opts #
exit #
end #
rescue OptionParser::InvalidOption, OptionParser::MissingArgument #
puts $!.to_s # Friendly output when parsing fails
puts opts #
exit #
end #
puts "Being verbose" if options[:verbose]
puts "Logging to file #{options[:logfile]}" if options[:logfile]
require 'serimi_class'
Serimi.new(options)