-
Notifications
You must be signed in to change notification settings - Fork 0
/
mallet_export.rb
42 lines (35 loc) · 979 Bytes
/
mallet_export.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class Export::MalletExport < Export
DESCRIPTION = 'Export data for usage in the Mallet LDA Tool'
INPUTS = {
data: :attribute,
metadata: :attributes
}.freeze
def export!
data = @collection.export(@data, true)
metadata = @collection.export(@metadata, true)
return mallet_zip_from_hash(data, metadata), 'mallet.zip'
end
def mallet_zip_from_hash(data, metadata)
require 'zip'
stringio = ::Zip::OutputStream.write_buffer do |zio|
data.each do |row|
zio.put_next_entry("input_dir/#{row.values.first}")
zio.write(row.values.second.to_s)
end
# key_to_dismiss = h.first.keys.second
zio.put_next_entry("metadata.csv")
zio.write(csv_from_hash(metadata))
end
return stringio.string
end
def csv_from_hash(h)
col_names = h.first.keys
out = CSV.generate do |csv|
csv << col_names
h.each do |row|
csv << row.values
end
end
return out
end
end