Skip to content

Commit

Permalink
Use [String] in lieu of String to handle chained keys. (#92)
Browse files Browse the repository at this point in the history
  • Loading branch information
ShikiSuen authored Dec 8, 2022
1 parent ab88c2d commit 0de88d0
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 132 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,16 @@ class IMKMyInputController: IMKInputController {
```swift
class ExampleLM: Megrez.LangModel {
...
override func unigramsFor(key: String) -> [Megrez.Unigram] {
override func unigramsFor(keyArray: [String]) -> [Megrez.Unigram] {
...
}
...
}
```

這個型別需要下述兩個函式能夠針對給定的鍵回饋對應的資料值、或其存無狀態:
- unigramsFor(key: String) -> [Megrez.Unigram]
- hasUnigramsFor(key: String) -> Bool
- unigramsFor(keyArray: [String]) -> [Megrez.Unigram]
- hasUnigramsFor(keyArray: [String]) -> Bool

MegrezTests.swift 檔案內的 SimpleLM 可以作為範例。

Expand All @@ -82,7 +82,7 @@ MegrezTests.swift 檔案內的 SimpleLM 可以作為範例。
- `compositor.insertKey("gao1")` 可以在當前的游標位置插入讀音「gao1」。
- `compositor.dropKey(direction: .front)` 的作用是:朝著往文字輸入方向、砍掉一個與游標相鄰的讀音。反之,`dropKey(direction: .rear)` 則朝著與文字輸入方向相反的方向、砍掉一個與游標相鄰的讀音。
- 在威注音的術語體系當中,「文字輸入方向」為向前(Front)、與此相反的方向為向後(Rear)。
- `compositor.overrideCandidate(.init(key: "讀音", value: "候選字"), at: 游標位置, overrideType: 覆寫模式)` 用來根據輸入法選中的候選字詞、據此更新當前游標位置選中的候選字詞節點當中的候選字詞。
- `compositor.overrideCandidate(.init(keyArray: ["讀音"], value: "候選字"), at: 游標位置, overrideType: 覆寫模式)` 用來根據輸入法選中的候選字詞、據此更新當前游標位置選中的候選字詞節點當中的候選字詞。

輸入完內容之後,可以聲明一個用來接收結果的變數:

Expand Down
24 changes: 14 additions & 10 deletions Sources/Megrez/1_Compositor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ extension Megrez {
/// 該軌格內可以允許的最大幅位長度。
public static var maxSpanLength: Int = 10 { didSet { maxSpanLength = max(6, maxSpanLength) } }
/// 公開:多字讀音鍵當中用以分割漢字讀音的記號的預設值,是「-」。
public static let kDefaultSeparator: String = "-"
public static var theSeparator: String = "-"
/// 該組字器的游標位置。
public var cursor: Int = 0 {
didSet {
Expand All @@ -33,7 +33,12 @@ extension Megrez {
/// 該組字器的標記器位置。
public var marker: Int = 0 { didSet { marker = max(0, min(marker, length)) } }
/// 公開:多字讀音鍵當中用以分割漢字讀音的記號,預設為「-」。
public var separator = kDefaultSeparator
public var separator = theSeparator {
didSet {
Self.theSeparator = separator
}
}

/// 公開:組字器內已經插入的單筆索引鍵的數量。
public var width: Int { keys.count }
/// 公開:最近一次爬軌結果。
Expand Down Expand Up @@ -71,7 +76,7 @@ extension Megrez {
/// - Parameter key: 要插入的索引鍵。
/// - Returns: 該操作是否成功執行。
@discardableResult public mutating func insertKey(_ key: String) -> Bool {
guard !key.isEmpty, key != separator, langModel.hasUnigramsFor(key: key) else { return false }
guard !key.isEmpty, key != separator, langModel.hasUnigramsFor(keyArray: [key]) else { return false }
keys.insert(key, at: cursor)
let gridBackup = spans
resizeGrid(at: cursor, do: .expand)
Expand Down Expand Up @@ -242,7 +247,7 @@ extension Megrez.Compositor {
return true
}

func getJointKeyArray(range: Range<Int>) -> [String] {
func getJoinedKeyArray(range: Range<Int>) -> [String] {
// 下面這句不能用 contains,不然會要求至少 macOS 13 Ventura。
guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] }
return keys[range].map { String($0) }
Expand All @@ -262,11 +267,10 @@ extension Megrez.Compositor {
var nodesChanged = 0
for position in range {
for theLength in 1...min(maxSpanLength, range.upperBound - position) {
let jointKeyArray = getJointKeyArray(range: position..<(position + theLength))
let jointKey = jointKeyArray.joined(separator: separator)
if let theNode = getNode(at: position, length: theLength, keyArray: jointKeyArray) {
let joinedKeyArray = getJoinedKeyArray(range: position..<(position + theLength))
if let theNode = getNode(at: position, length: theLength, keyArray: joinedKeyArray) {
if !updateExisting { continue }
let unigrams = langModel.unigramsFor(key: jointKey)
let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray)
// 自動銷毀無效的節點。
if unigrams.isEmpty {
if theNode.keyArray.count == 1 { continue }
Expand All @@ -277,10 +281,10 @@ extension Megrez.Compositor {
nodesChanged += 1
continue
}
let unigrams = langModel.unigramsFor(key: jointKey)
let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray)
guard !unigrams.isEmpty else { continue }
insertNode(
.init(keyArray: jointKeyArray, spanLength: theLength, unigrams: unigrams, keySeparator: separator),
.init(keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams),
at: position
)
nodesChanged += 1
Expand Down
4 changes: 2 additions & 2 deletions Sources/Megrez/2_Walker.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ extension Megrez.Compositor {
}
}

let terminal = Vertex(node: .init(keyArray: ["_TERMINAL_"], keySeparator: separator))
let terminal = Vertex(node: .init(keyArray: ["_TERMINAL_"]))

for (i, vertexSpan) in vertexSpans.enumerated() {
for vertex in vertexSpan {
Expand All @@ -47,7 +47,7 @@ extension Megrez.Compositor {
}
}

let root = Vertex(node: .init(keyArray: ["_ROOT_"], keySeparator: separator))
let root = Vertex(node: .init(keyArray: ["_ROOT_"]))
root.distance = 0
root.edges.append(contentsOf: vertexSpans[0])

Expand Down
65 changes: 44 additions & 21 deletions Sources/Megrez/3_KeyValuePaired.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,49 +7,66 @@ import Foundation

extension Megrez.Compositor {
public struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible {
/// 。一般情況下用來放置讀音等可以用來作為索引的內容。
public var key: String
/// 鍵陣列。一般情況下用來放置讀音等可以用來作為索引的內容。
public var keyArray: [String]
/// 資料值。
public var value: String
/// 將當前鍵值列印成一個字串。
public var description: String { "(" + key + "," + value + ")" }
public var description: String { "(" + keyArray.description + "," + value + ")" }
/// 判斷當前鍵值配對是否合規。如果鍵與值有任一為空,則結果為 false。
public var isValid: Bool { !key.isEmpty && !value.isEmpty }
public var isValid: Bool { !keyArray.joined().isEmpty && !value.isEmpty }
/// 將當前鍵值列印成一個字串,但如果該鍵值配對為空的話則僅列印「()」。
public var toNGramKey: String { !isValid ? "()" : "(" + key + "," + value + ")" }
public var toNGramKey: String { !isValid ? "()" : "(" + joinedKey() + "," + value + ")" }

/// 初期化一組鍵值配對。
/// - Parameters:
/// - key: 鍵陣列。一般情況下用來放置讀音等可以用來作為索引的內容。
/// - value: 資料值。
public init(keyArray: [String], value: String = "N/A") {
self.keyArray = keyArray.isEmpty ? ["N/A"] : keyArray
self.value = value.isEmpty ? "N/A" : value
}

/// 初期化一組鍵值配對。
/// - Parameters:
/// - key: 鍵。一般情況下用來放置讀音等可以用來作為索引的內容。
/// - value: 資料值。
public init(key: String = "", value: String = "") {
self.key = key
self.value = value
public init(key: String = "N/A", value: String = "N/A") {
keyArray = key.isEmpty ? ["N/A"] : key.components(separatedBy: Megrez.Compositor.theSeparator)
self.value = value.isEmpty ? "N/A" : value
}

public func hash(into hasher: inout Hasher) {
hasher.combine(key)
hasher.combine(keyArray)
hasher.combine(value)
}

public func joinedKey(by separator: String = Megrez.Compositor.theSeparator) -> String {
keyArray.joined(separator: separator)
}

public static func == (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
lhs.key == rhs.key && lhs.value == rhs.value
lhs.keyArray == rhs.keyArray && lhs.value == rhs.value
}

public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.key.count < rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value < rhs.value)
(lhs.keyArray.joined().count < rhs.keyArray.joined().count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value < rhs.value)
}

public static func > (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.key.count > rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value > rhs.value)
(lhs.keyArray.joined().count > rhs.keyArray.joined().count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value > rhs.value)
}

public static func <= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.key.count <= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value <= rhs.value)
(lhs.keyArray.joined().count <= rhs.keyArray.joined().count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value <= rhs.value)
}

public static func >= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value)
(lhs.keyArray.joined().count >= rhs.keyArray.joined().count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value >= rhs.value)
}
}

Expand All @@ -70,7 +87,7 @@ extension Megrez.Compositor {
}
let keyAtCursor = keys[location]
for theNode in anchors.map(\.node) {
if theNode.key.isEmpty { continue }
if theNode.keyArray.joined(separator: separator).isEmpty { continue }
for gram in theNode.unigrams {
switch filter {
case .all:
Expand All @@ -81,7 +98,7 @@ extension Megrez.Compositor {
case .endAt:
if theNode.keyArray.reversed()[0] != keyAtCursor { continue }
}
result.append(.init(key: theNode.key, value: gram.value))
result.append(.init(keyArray: theNode.keyArray, value: gram.value))
}
}
return result
Expand All @@ -100,7 +117,7 @@ extension Megrez.Compositor {
)
-> Bool
{
overrideCandidateAgainst(key: candidate.key, at: location, value: candidate.value, type: overrideType)
overrideCandidateAgainst(keyArray: candidate.keyArray, at: location, value: candidate.value, type: overrideType)
}

/// 使用給定的候選字詞字串,將給定位置的節點的候選字詞改為與之一致的候選字詞。
Expand All @@ -115,7 +132,7 @@ extension Megrez.Compositor {
_ candidate: String,
at location: Int, overrideType: Node.OverrideType = .withHighScore
) -> Bool {
overrideCandidateAgainst(key: nil, at: location, value: candidate, type: overrideType)
overrideCandidateAgainst(keyArray: nil, at: location, value: candidate, type: overrideType)
}

// MARK: Internal implementations.
Expand All @@ -127,14 +144,18 @@ extension Megrez.Compositor {
/// - value: 資料值。
/// - type: 指定覆寫行為。
/// - Returns: 該操作是否成功執行。
internal func overrideCandidateAgainst(key: String?, at location: Int, value: String, type: Node.OverrideType)
internal func overrideCandidateAgainst(keyArray: [String]?, at location: Int, value: String, type: Node.OverrideType)
-> Bool
{
let location = max(min(location, keys.count), 0) // 防呆
var arrOverlappedNodes: [NodeAnchor] = fetchOverlappingNodes(at: min(keys.count - 1, location))
var overridden: NodeAnchor?
for anchor in arrOverlappedNodes {
if let key = key, anchor.node.key != key { continue }
if let keyArray = keyArray,
anchor.node.keyArray.joined(separator: separator) != keyArray.joined(separator: separator)
{
continue
}
if anchor.node.selectOverrideUnigram(value: value, type: type) {
overridden = anchor
break
Expand All @@ -150,7 +171,9 @@ extension Megrez.Compositor {
arrOverlappedNodes = fetchOverlappingNodes(at: i)
for anchor in arrOverlappedNodes {
if anchor.node == overridden.node { continue }
if !overridden.node.key.contains(anchor.node.key) || !overridden.node.value.contains(anchor.node.value) {
if !overridden.node.keyArray.joined(separator: separator).contains(
anchor.node.keyArray.joined(separator: separator)) || !overridden.node.value.contains(anchor.node.value)
{
anchor.node.reset()
continue
}
Expand Down
27 changes: 17 additions & 10 deletions Sources/Megrez/6_Node.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,19 @@ extension Megrez.Compositor {
/// 數(比如野獸常數),以讓「c」更容易單獨被選中。
public var overridingScore: Double = 114_514

public private(set) var key: String
// public var key: String { keyArray.joined(separator: Megrez.Compositor.theSeparator) }

public private(set) var keyArray: [String]
public private(set) var spanLength: Int
public private(set) var unigrams: [Megrez.Unigram]
public private(set) var currentUnigramIndex: Int = 0 {
didSet { currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0) }
}

public var currentPair: Megrez.Compositor.KeyValuePaired { .init(key: key, value: value) }
public var currentPair: Megrez.Compositor.KeyValuePaired { .init(keyArray: keyArray, value: value) }

public func hash(into hasher: inout Hasher) {
hasher.combine(key)
hasher.combine(keyArray)
hasher.combine(spanLength)
hasher.combine(unigrams)
hasher.combine(currentUnigramIndex)
Expand All @@ -68,14 +69,11 @@ extension Megrez.Compositor {
public private(set) var overrideType: Node.OverrideType

public static func == (lhs: Node, rhs: Node) -> Bool {
lhs.key == rhs.key && lhs.spanLength == rhs.spanLength
lhs.keyArray == rhs.keyArray && lhs.spanLength == rhs.spanLength
&& lhs.unigrams == rhs.unigrams && lhs.overrideType == rhs.overrideType
}

public init(
keyArray: [String] = [], spanLength: Int = 0, unigrams: [Megrez.Unigram] = [], keySeparator: String = ""
) {
key = keyArray.joined(separator: keySeparator)
public init(keyArray: [String] = [], spanLength: Int = 0, unigrams: [Megrez.Unigram] = []) {
self.keyArray = keyArray
self.spanLength = spanLength
self.unigrams = unigrams
Expand Down Expand Up @@ -112,6 +110,10 @@ extension Megrez.Compositor {
overrideType = .withNoOverrides
}

public func joinedKey(by separator: String = Megrez.Compositor.theSeparator) -> String {
keyArray.joined(separator: separator)
}

public func selectOverrideUnigram(value: String, type: Node.OverrideType) -> Bool {
guard type != .withNoOverrides else {
return false
Expand All @@ -136,7 +138,7 @@ extension Megrez.Compositor {
let spanIndex: Int // 幅位座標
var spanLength: Int { node.spanLength }
var unigrams: [Megrez.Unigram] { node.unigrams }
var key: String { node.key }
var keyArray: [String] { node.keyArray }
var value: String { node.value }

/// 將該節錨雜湊化。
Expand All @@ -154,7 +156,12 @@ extension Array where Element == Megrez.Compositor.Node {
public var values: [String] { map(\.value) }

/// 從一個節點陣列當中取出目前的索引鍵陣列。
public var keys: [String] { map(\.key) }
public func joinedKeys(by separator: String = Megrez.Compositor.theSeparator) -> [String] {
map { $0.keyArray.lazy.joined(separator: separator) }
}

/// 從一個節點陣列當中取出目前的索引鍵陣列。
public var keyArrays: [[String]] { map(\.keyArray) }

/// 返回一連串的節點起點。結果為 (Result A, Result B) 辭典陣列
/// Result A 以索引查座標,Result B 以座標查索引。
Expand Down
16 changes: 8 additions & 8 deletions Sources/Megrez/7_LangModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

/// 語言模組協定。
public protocol LangModelProtocol {
/// 給定鍵,讓語言模型找給一組單元圖陣列。
func unigramsFor(key: String) -> [Megrez.Unigram]
/// 給定鍵,確認是否有單元圖記錄在庫。
func hasUnigramsFor(key: String) -> Bool
/// 給定鍵陣列,讓語言模型找給一組單元圖陣列。
func unigramsFor(keyArray: [String]) -> [Megrez.Unigram]
/// 給定鍵陣列,確認是否有單元圖記錄在庫。
func hasUnigramsFor(keyArray: [String]) -> Bool
}

extension Megrez.Compositor {
Expand All @@ -24,15 +24,15 @@ extension Megrez.Compositor {
/// 給定索引鍵,讓語言模型找給一組經過穩定排序的單元圖陣列。
/// - Parameter key: 給定的索引鍵字串。
/// - Returns: 對應的經過穩定排序的單元圖陣列。
public func unigramsFor(key: String) -> [Megrez.Unigram] {
langModel.unigramsFor(key: key).stableSorted { $0.score > $1.score }
public func unigramsFor(keyArray: [String]) -> [Megrez.Unigram] {
langModel.unigramsFor(keyArray: keyArray).stableSorted { $0.score > $1.score }
}

/// 根據給定的索引鍵來確認各個資料庫陣列內是否存在對應的資料。
/// - Parameter key: 索引鍵。
/// - Returns: 是否在庫。
public func hasUnigramsFor(key: String) -> Bool {
langModel.hasUnigramsFor(key: key)
public func hasUnigramsFor(keyArray: [String]) -> Bool {
langModel.hasUnigramsFor(keyArray: keyArray)
}
}
}
Expand Down
Loading

0 comments on commit 0de88d0

Please sign in to comment.