-
Notifications
You must be signed in to change notification settings - Fork 1
/
kage-socho.rb
executable file
·428 lines (414 loc) · 17.4 KB
/
kage-socho.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
#!/usr/bin/env ruby
require "#{File.dirname(__FILE__)}/Kage.rb"
############################################################################
preInputGlyph = {
'u5196-03' => Kage::Glyph.new("u5196-03\t2:7:8:18:22:27:43:27:59$1:0:2:23:31:176:31$2:22:7:176:31:170:43:156:63")
}
# U+4E0F 丏 U+4E31 丱 U+4EE5 以 U+53E2 叢 U+5433 吳
# U+5DE5 工 U+5DE9 巩 U+6B62 止 U+6B63 正 U+6B6F 歯
# U+6B72 歲 U+6B73 歳 U+723F 爿 U+7247 片 U+8033 耳
# U+8085 肅 U+8096 肖 U+8207 與 U+898D 覍 U+9F52 齒
# U+23D92 [淵-氵] U+27D2A [瑣-王]
ignorePattern = /^(u(4e(0f|31|e5)|53d6|53e2|5433|5de[59]|5f0f|6b6[23f]|6b7[23]|723f|7247|80(33|85|96)|8207|898d|9f52|23d92|27d2a)|aj1-13760|cdp-89cd|j90-4c6c|u2ff1-cdp-8cfa-u5bf8)($|-|@\d+)/
nonKanjiPattern = /^(mihail-jp_mingkana-)?u([01a-e][0-9a-f]{3}|(2[0-9a-d]|3[023]|f[0-8b-f])[0-9a-f]{2}|(2e[0-7]|2f[ef]|31[0-8abf]|4d[c-f])[0-9a-f]|([13-f]|10)[0-9a-f]{4})($|-|@\d+)/
############################################################################
# 特定の参照を解体
def dereference(stat, preInputGlyph, glyph)
stat['dereference'] = {}
tmpGlyph = glyph.to_a
for stroke, index in glyph.each_with_index.reverse_each
if stroke.ref? and preInputGlyph.include?(stroke.link_to) then
refDat = preInputGlyph[stroke.link_to].dup
tmpRefDat = refDat.to_a
for tmpStrokeRaw, k in tmpRefDat.each_with_index
tmpStroke = tmpStrokeRaw.to_a
for i in [4, 6, 8, 10].reject {|x| x >= tmpStroke.to_a.length}
tmpStroke[i - 1] = tmpStroke[i - 1] * (stroke.endX - stroke.startX) / 200 + stroke.startX
tmpStroke[i ] = tmpStroke[i ] * (stroke.endY - stroke.startY) / 200 + stroke.startY
end
tmpRefDat[k] = Kage::Stroke.new(tmpStroke)
end
refDat.replace_with tmpRefDat
derefRange = (index)...(index + refDat.length)
stat['dereference'][derefRange] = stroke.dup
tmpGlyph[index..index] = refDat.to_a
end
end
glyph.replace_with(tmpGlyph)
end
# 特定部首検出
def find_radical_walk(stat, glyph) # 之繞
stat['walkRadical'] = {'index' => nil, 'type' => nil, 'stat' => 0, 'tmpPos' => [nil, nil]}
for stroke, index in glyph.each_with_index
if stroke[0..2] == [2, 7, 8] and stroke.strokelength < 60 and [0, 1].include?(stat['walkRadical']['stat']) then
stat['walkRadical']['stat'] += 1
elsif stroke[0..2] == [1, 0, 2] and stroke.strokelength < 60 and [1, 2].include?(stat['walkRadical']['stat']) then
stat['walkRadical']['stat'] += 2
stat['walkRadical']['tmpPos'] = stroke.endPoint
elsif stroke[0..2] == [1, 22, 0] and [3, 4].include?(stat['walkRadical']['stat']) and stroke.startPoint == stat['walkRadical']['tmpPos'] then
stat['walkRadical']['stat'] += 2
stat['walkRadical']['tmpPos'] = stroke.endPoint
elsif stroke[0..2] == [2, 0, 7] and [5, 6].include?(stat['walkRadical']['stat']) and Kage.dist(stroke.endPoint, stat['walkRadical']['tmpPos']) < 12 then
stat['walkRadical']['stat'] += 2
elsif (stroke[0..2] == [2, 7, 0] or stroke[0..2] == [6, 7, 0]) and [7, 8].include?(stat['walkRadical']['stat']) and Kage.dist(stroke.startPoint, stat['walkRadical']['tmpPos']) < 12 then
stat['walkRadical']['index'] = index - stat['walkRadical']['stat'] + 3
stat['walkRadical']['type'] = stat['walkRadical']['stat'] - 6
stat['walkRadical']['stat'] = 0
stat['walkRadical']['tmpPos'] = [nil, nil]
STDERR.write("#{glyph.name}: #{stat['walkRadical']['type']}点之繞をインデックス#{stat['walkRadical']['index']}で検出!\n")
else
stat['walkRadical']['stat'] = 0
stat['walkRadical']['tmpPos'] = [nil, nil]
end
end
end
def find_special_l2rd(stat, glyph) # 特殊型右はらい
stat['specialL2RD'] = {'index' => nil}
for stroke, index in glyph.each_with_index
if stroke[0..2] == [6, 7, 0] and stroke.control2Y >= stroke.endY and stat['walkRadical']['index'].nil? then
stat['specialL2RD']['index'] = index
STDERR.write("#{glyph.name}: 特殊型右はらいをインデックス#{index}で検出!\n")
end
end
end
def find_point_on_horiz(stat, glyph) # なべぶた・ウ冠
stat['pointOnHoriz'] = {'horiz' => [], 'point' => [], 'diagonal' => []}
if glyph.name =~ /^(u5c07|u6215|u8eab)\b/ then # 除外グリフ
return
end
for stroke, index in glyph.each_with_index
if stroke[0] == 1 and stroke.startY == stroke.endY then
stat['pointOnHoriz']['horiz'].push([index, stroke.dup])
elsif stroke[0..1] == [1, 0] and stroke.startX == stroke.endX then
stat['pointOnHoriz']['point'].push([index, stroke.dup])
elsif (stroke[0..2] == [2, 7, 8] or stroke[0..2] == [2, 0, 7])and stroke.startY < stroke.endY then
stat['pointOnHoriz']['diagonal'].push([index, stroke.dup])
end
end
end
def find_hook(stat, glyph) # 鈎(レの字)
stat['hook'] = {'hook' => [], 'stem' => [], 'horiz' => []}
if glyph.name =~ /^(aj1-13601|u8033|u8eab)\b/ then # 除外グリフ
return
end
for stroke, index in glyph.each_with_index
if stroke.strokeType == 1 and stroke.startX == stroke.endX then
stat['hook']['stem'].push([index, stroke.dup])
elsif stroke.strokeType == 2 and stroke.endType == 7 and stroke.startY < stroke.endY then
stat['hook']['stem'].push([index, stroke.dup])
elsif stroke.strokeType == 1 and stroke.startY == stroke.endY then
stat['hook']['horiz'].push([index, stroke.dup])
elsif stroke[0..2] == [2, 0, 7] and stroke.startY > stroke.endY and stroke.startX < stroke.endX then
stat['hook']['hook'].push([index, stroke.dup])
end
end
end
def find_roofed_l2rd(stat, glyph) # 屋根付き右はらい
stat['l2rd'] = {'l2rd' => [], 'horiz' => []}
for stroke, index in glyph.each_with_index
if stroke.startType == 27 then
stat['l2rd']['l2rd'].push([index, stroke.dup])
elsif stroke.strokeType == 1 and stroke.startY == stroke.endY then
stat['l2rd']['horiz'].push([index, stroke.dup])
end
end
end
def find_l2rd_bended_up(stat, glyph) # 上に反った右はらい
stat['l2rd_up'] = []
for stroke, index in glyph.each_with_index
if stroke.strokeType == 2 and stroke.startY < stroke.endY and stroke.endY < stroke.control1Y then
stat['l2rd_up'].push([index, stroke.dup])
end
end
end
# 特定部首を宋朝体字形に置換え
def replace_radical_walk(stat, glyph) # 之繞
if not stat['walkRadical']['index'].nil? then
index = stat['walkRadical']['index'] + stat['walkRadical']['type']
baseWidth = glyph[index].endX - glyph[index].startX
stroke = glyph[index]
stroke.startX += baseWidth * 2 / 9
stroke.startY += 4
stroke.endX += baseWidth * 2 / 9
glyph[index] = stroke
stroke = glyph[index + 1]
stroke.strokeType = 2
stroke.endType = 7
stroke.startX += baseWidth * 2 / 9
stroke.control1X = (stroke.startX * 2 + stroke.endX) / 3
stroke.control1Y = (stroke.endY + stroke.endY) / 2
stroke.endX -= baseWidth / 9
stroke.endY -= 1
glyph[index + 1] = stroke
stroke = glyph[index + 2]
stroke.startY = (stroke.startY - stroke.endY) / 2 + stroke.endY
stroke.control1Y = (stroke.control1Y - stroke.endY) / 2 + stroke.endY
stroke.endX -= baseWidth * 2 / 9
glyph[index + 2] = stroke
stroke = glyph[index + 3]
if stroke.strokeType == 6 then
stroke.endY = stroke.control2Y
stroke.controlPoint2 = stroke.controlPoint1
stroke.strokeType = 2
end
stroke.startX -= baseWidth * 2 / 9
if stroke.control1Y > stroke.endY then
stroke.control1Y, stroke.endY = stroke.endY, stroke.control1Y
end
glyph[index + 3] = stroke
if stat['walkRadical']['type'] == 1 then
baseY = glyph[index - 1].endY
stroke0 = glyph[index + 0]
stroke1 = glyph[index + 1]
spanY = (stroke1.startY - baseY) / 2
stroke0.startY -= spanY / 2
stroke0.endY -= spanY / 2
stroke1.startY -= spanY / 2
downHeight = stroke1.endY - stroke1.startY - 1
stroke1.control1Y = (stroke1.control1Y - stroke1.startY) * 2 / 5 + stroke1.startY
stroke1.endY = (stroke1.endY - stroke1.startY) * 2 / 5 + stroke1.startY
stroke2 = [2, 7, 8,
stroke1.endX, stroke1.endY - 1,
stroke1.startX + 1, stroke1.startY + downHeight * 55 / 100,
stroke1.startX, stroke1.startY + downHeight * 7 / 10 + 1]
stroke3 = [2, 32, 7,
stroke1.startX, stroke1.startY + downHeight * 7 / 10 - 1,
stroke1.startX - 1, stroke1.startY + downHeight * 9 / 10,
stroke1.endX, stroke1.startY + downHeight]
glyph[index] = stroke0
glyph[index + 1] = stroke1
glyph.insert(index + 2, stroke2, stroke3)
end
end
return stat['walkRadical']['type'] == 1
end
def replace_special_l2rd(stat, glyph) # 特殊型右はらい
if not stat['specialL2RD']['index'].nil? then
index = stat['specialL2RD']['index']
stroke = glyph[index]
stroke.endY = stroke.control2Y
stroke.controlPoint2 = stroke.controlPoint1
stroke.strokeType = 2
glyph[index] = stroke
for xStroke, xIndex in glyph.each_with_index
if xIndex != index then
xStroke = glyph[xIndex]
begin
if (not xStroke.ref?) and ((stroke.control1X)..(stroke.endX)).cover?(xStroke.endX) and ((stroke.control1Y)..(stroke.endY)).cover?(xStroke.endY) then
slope = (stroke.endY - stroke.control1Y).to_f / (stroke.endX - stroke.control1X).to_f
yIntercept = stroke.control1Y.to_f - slope * stroke.control1X.to_f
xStroke.endY = (xStroke.endX * slope + yIntercept - 3).round
glyph[xIndex] = xStroke
end
rescue => evar
STDERR.write("#{glyph.name}: インデックス#{index}のKageデータが正しくありません!!\n#{evar.to_s}\n")
end
end
end
if index > 0 then
xStroke = glyph[index - 1]
if xStroke[0..2] == [2, 0, 7] and Kage.dist(xStroke.endPoint, stroke.startPoint) < 12 then
xStroke.startY = (xStroke.startY - xStroke.endY) / 2 + xStroke.endY
xStroke.control1Y = (xStroke.control1Y - xStroke.endY) / 2 + xStroke.endY
glyph[index - 1] = xStroke
end
end
end
end
def replace_point_on_horiz(stat, glyph) # なべぶた・ウ冠
if not (stat['pointOnHoriz']['horiz'].empty? or stat['pointOnHoriz']['point'].empty?) then
intersectThreshold = 10
for pointCandidate, index in stat['pointOnHoriz']['point'].each_with_index
for horizCandidate in stat['pointOnHoriz']['horiz']
if ((horizCandidate[1].startX)..(horizCandidate[1].endX)).cover?(pointCandidate[1].endX) and
((pointCandidate[1].startY + intersectThreshold)..(pointCandidate[1].endY - intersectThreshold)).cover?(horizCandidate[1].endY) and
pointCandidate[1].strokelength < 60 then
stat['pointOnHoriz']['point'][index] = nil
break
end
end
end
stat['pointOnHoriz']['point'].compact!
for horizCandidate in stat['pointOnHoriz']['horiz']
hits = []
for pointCandidate in stat['pointOnHoriz']['point']
if ((horizCandidate[1].startX)..(horizCandidate[1].endX)).cover?(pointCandidate[1].endX) and
((pointCandidate[1].endY - intersectThreshold)..(pointCandidate[1].endY + intersectThreshold)).cover?(horizCandidate[1].endY) then
hits.push(pointCandidate)
end
end
if hits.length == 1 then
for diagonalCandidate in stat['pointOnHoriz']['diagonal']
if ((horizCandidate[1].startX)..(horizCandidate[1].endX)).cover?(diagonalCandidate[1].endX) and
((((hits[0][1].endY + hits[0][1].startY) / 2)..(hits[0][1].endY + intersectThreshold)).cover?(diagonalCandidate[1].endY) or
(((hits[0][1].startY)..(hits[0][1].endY + hits[0][1].startY) / 2)).cover?(diagonalCandidate[1].startY)) then
hits.push(diagonalCandidate)
end
end
end
if hits.length == 1 and hits[0][1].strokelength < 60 then
index = hits[0][0]
baseLength = [horizCandidate[1].endX - horizCandidate[1].startX, 100].min
STDERR.write("#{glyph.name}: 鍋蓋・ウ冠の点をインデックス#{index}で検出!\n")
stroke = hits[0][1]
stroke.strokeType = 2
stroke.startType = 7
stroke.endType = 8
stroke.startX -= (baseLength.to_f / 4).ceil
stroke.startY -= ((stroke.endY - stroke.startY).to_f / 5).round
stroke.startY = 0 if stroke.startY < 0
stroke.control1Y -= ((stroke.endY - stroke.startY).to_f / 5).round
stroke.endX += (baseLength.to_f / 20).ceil
stroke.endY -= ((stroke.endY - stroke.startY).to_f / 5).round
glyph[index] = stroke
end
end
end
end
def replace_hook(stat, glyph) # 鈎(レの字)
if not (stat['hook']['hook'].empty? or stat['hook']['stem'].empty?) then
intersectThreshold = 8
for hookCandidate, index in stat['hook']['hook'].each_with_index
intersectCount = 0
slope = (hookCandidate[1].endY - hookCandidate[1].startY).to_f / (hookCandidate[1].endX - hookCandidate[1].startX).to_f
yIntercept = hookCandidate[1].endY - slope * hookCandidate[1].endX
for stemCandidate in stat['hook']['stem']
intersectY = stemCandidate[1].endX * slope + yIntercept
if ((hookCandidate[1].startX)..(hookCandidate[1].endX)).cover?(stemCandidate[1].endX) and
((intersectY - intersectThreshold)..(intersectY + intersectThreshold)).cover?(stemCandidate[1].endY) then
intersectCount += 1
end
end
if intersectCount != 1 then
stat['hook']['hook'][index] = nil
end
end
stat['hook']['hook'].compact!
for stemCandidate in stat['hook']['stem']
hits = []
for hookCandidate in stat['hook']['hook']
slope = (hookCandidate[1].endY - hookCandidate[1].startY).to_f / (hookCandidate[1].endX - hookCandidate[1].startX).to_f
yIntercept = hookCandidate[1].endY - slope * hookCandidate[1].endX
intersectY = stemCandidate[1].endX * slope + yIntercept
if ((hookCandidate[1].startX)..(hookCandidate[1].endX)).cover?(stemCandidate[1].endX) and
((intersectY - intersectThreshold)..(intersectY + intersectThreshold)).cover?(stemCandidate[1].endY) then
hits.push(hookCandidate)
end
end
if hits.length == 1 and stemCandidate[1].strokeType == 1 then
for horizCandidate in stat['hook']['horiz']
if ((stemCandidate[1].startY + intersectThreshold)..(stemCandidate[1].endY - intersectThreshold)).cover?(horizCandidate[1].endY) and
((horizCandidate[1].startX + intersectThreshold)..(horizCandidate[1].endX - intersectThreshold)).cover?(stemCandidate[1].endX) then
hits.push(horizCandidate)
end
end
end
if hits.length == 1 and
(stemCandidate[1].endX - hits[0][1].startX).to_f / (hits[0][1].endX - hits[0][1].startX).to_f <= (stemCandidate[1].strokeType == 1 ? 0.5 : 0.333) then
stroke = hits[0][1].dup
stem = stemCandidate[1].dup
index = hits[0][0]
sIndex = stemCandidate[0]
STDERR.write("#{glyph.name}: 鈎(レの字)をインデックス#{index}で検出!\n")
if stem.strokeType == 1 then
stroke.startX = (stem.endX - 8).round
end
slope = (stroke.endY - stroke.startY).to_f / (stroke.endX - stroke.startX).to_f
yIntercept = stroke.endY - slope * stroke.endX
stroke.control1Y = (stroke.control1X * slope + yIntercept).round + 1
stem.endY = (stem.endX * slope + yIntercept).round
if stem.strokeType == 1 then
stem.endType = 32
elsif stem.strokeType == 2 and stem.endX - stroke.startX > 8 then
stem.endX = stroke.startX + 8
end
glyph[index] = stroke
glyph[sIndex] = stem
end
end
end
end
def replace_roofed_l2rd(stat, glyph) # 屋根付き右はらい
if not (stat['l2rd']['l2rd'].empty? or stat['l2rd']['horiz'].empty?) then
for horizCandidate in stat['l2rd']['horiz']
for downCandidate in stat['l2rd']['l2rd']
if horizCandidate[1].endPoint == downCandidate[1].startPoint then
roof = horizCandidate[1].dup
index = horizCandidate[0]
roofLength = roof.endX - roof.startX
STDERR.write("#{glyph.name}: 屋根付き右はらいをインデックス#{index}で検出!\n")
if roofLength > 25 then
roof.startX = roof.endX - 25
glyph[index] = roof
end
end
end
end
end
end
def replace_l2rd_bended_up(stat, glyph) # 上に反った右はらい
if not stat['l2rd_up'].empty? then
for downStroke in stat['l2rd_up']
stroke = downStroke[1].dup
index = downStroke[0]
STDERR.write("#{glyph.name}: 上に反った右はらいをインデックス#{index}で検出!\n")
stroke.control2Y, stroke.endY = stroke.endY, stroke.control2Y
glyph[index] = stroke
end
end
end
# デリファレンスを元に戻す
def undo_dereference(stat, glyph)
if stat.include?('dereference') and (not stat['dereference'].empty?) then
tmpGlyph = glyph.to_a
for index, stroke in stat['dereference'].each_pair
tmpGlyph[index] = stroke
end
glyph.replace_with(tmpGlyph)
end
end
############################################################################
# グリフごとにループ
while l = ARGF.gets
l.chomp!
begin
glyph = Kage::Glyph.new(l)
rescue NoMethodError
STDERR.write("\e[33m\e[1m\e[41m#{l.split(/\t/)[0]}: 異常データ!!→#{l.split(/\t/)[1]}\n\e[0m")
print "#{l}\n"
next
end
stat = {}
if glyph.name =~ ignorePattern then
# パターンに当てはまるグリフはスルー
elsif glyph.name =~ nonKanjiPattern then
# 非漢字は無視
elsif not glyph.ref_only? then
# 特定の参照を解体
dereference(stat, preInputGlyph, glyph)
# 特定部首検出
find_radical_walk(stat, glyph)
find_special_l2rd(stat, glyph)
find_point_on_horiz(stat, glyph)
find_hook(stat, glyph)
find_roofed_l2rd(stat, glyph)
find_l2rd_bended_up(stat, glyph)
# 特定部首を宋朝体字形に置換え
if replace_radical_walk(stat, glyph) then
STDERR.write("#{glyph.name}: 再計算を行います\n")
find_special_l2rd(stat, glyph)
find_point_on_horiz(stat, glyph)
find_hook(stat, glyph)
find_roofed_l2rd(stat, glyph)
find_l2rd_bended_up(stat, glyph)
end
replace_special_l2rd(stat, glyph)
replace_point_on_horiz(stat, glyph)
replace_hook(stat, glyph)
replace_roofed_l2rd(stat, glyph)
replace_l2rd_bended_up(stat, glyph)
end
# デリファレンスを元に戻す
undo_dereference(stat, glyph)
print "#{glyph.to_s}\n"
end