-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Removed staging, as it proved to be impacting performance negatively.…
… Some additional performance optimization, and added a benchmark target to test.
- Loading branch information
Showing
9 changed files
with
613,103 additions
and
153 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
// | ||
// Created by Gabor Detari [email protected] | ||
// Copyright 2024 Gabor Detari. All rights reserved. | ||
// | ||
|
||
|
||
import Foundation | ||
import SymSpellSwift | ||
|
||
class Stopwatch { | ||
private var startTime: DispatchTime? | ||
private var stopTime: DispatchTime? | ||
|
||
// Start the stopwatch | ||
func start() { | ||
startTime = DispatchTime.now() | ||
stopTime = nil | ||
} | ||
|
||
// Stop the stopwatch | ||
func stop() { | ||
stopTime = DispatchTime.now() | ||
} | ||
|
||
// Get the elapsed time in milliseconds | ||
var elapsedTime: Double { | ||
if let start = startTime { | ||
let end = stopTime ?? DispatchTime.now() // If stopwatch is still running, calculate till now | ||
let nanoTime = end.uptimeNanoseconds - start.uptimeNanoseconds | ||
let milliSeconds = Double(nanoTime) / 1_000_000_000 | ||
return milliSeconds | ||
} else { | ||
return 0 | ||
} | ||
} | ||
} | ||
|
||
@main | ||
class Benchmark { | ||
private static func fileURL(_ filename: String) -> URL { | ||
let currentFileURL = URL(fileURLWithPath: #file) | ||
let directoryURL = currentFileURL.deletingLastPathComponent() | ||
return directoryURL.appendingPathComponent(filename) | ||
} | ||
|
||
static let path = FileManager.default.currentDirectoryPath | ||
static let query1k = fileURL("noisy_query_en_1000.txt") | ||
|
||
static let dictionaryPath: [URL] = [ | ||
fileURL("frequency_dictionary_en_30_000.txt"), | ||
fileURL("frequency_dictionary_en_82_765.txt"), | ||
fileURL("frequency_dictionary_en_500_000.txt") | ||
] | ||
|
||
static let dictionaryName: [String] = [ | ||
"30k", | ||
"82k", | ||
"500k" | ||
] | ||
|
||
static let dictionarySize: [Int] = [ | ||
29159, | ||
82765, | ||
500_000 | ||
] | ||
|
||
static func buildQuery1K() -> [String] { | ||
var testList: [String] = Array(repeating: "", count: 1000) | ||
var i = 0 | ||
|
||
if let fileReader = try? String(contentsOf: query1k) { | ||
let lines = fileReader.split(separator: "\n") | ||
for line in lines { | ||
let lineParts = line.split(separator: " ") | ||
if lineParts.count >= 2 { | ||
testList[i] = String(lineParts[0]) | ||
i += 1 | ||
} | ||
} | ||
} | ||
return testList | ||
} | ||
|
||
static func warmUp() { | ||
let dict = SymSpell(maxDictionaryEditDistance: 2, prefixLength: 7) | ||
try? dict.loadDictionary(from: dictionaryPath[0], termIndex: 0, countIndex: 1) | ||
_ = dict.lookup("hockie", verbosity: .all, maxEditDistance: 1) | ||
} | ||
|
||
static func benchmarkPrecalculationLookup() { | ||
var resultNumber = 0 | ||
let repetitions = 1000 | ||
var totalLoopCount = 0 | ||
var totalMatches: Int64 = 0 | ||
// var totalOrigMatches: Int64 = 0 | ||
var totalLoadTime = 0.0, totalMem = 0.0, totalLookupTime = 0.0 | ||
// var totalOrigLoadTime = 0.0, totalOrigMem = 0.0, totalOrigLookupTime = 0.0 | ||
var totalRepetitions: Int64 = 0 | ||
|
||
let stopWatch = Stopwatch() | ||
|
||
for maxEditDistance in 1 ... 3 { | ||
for prefixLength in 5 ... 7 { | ||
for i in 0 ..< dictionaryPath.count { | ||
totalLoopCount += 1 | ||
|
||
// Instantiated dictionary | ||
let memSize = ProcessInfo.processInfo.physicalMemory | ||
stopWatch.start() | ||
let dict = SymSpell(maxDictionaryEditDistance: maxEditDistance, prefixLength: prefixLength) | ||
try? dict.loadDictionary(from: dictionaryPath[i], termIndex: 0, countIndex: 1, termCount: dictionarySize[i]) | ||
stopWatch.stop() | ||
let memDelta = ProcessInfo.processInfo.physicalMemory - memSize | ||
totalLoadTime += stopWatch.elapsedTime | ||
totalMem += Double(memDelta) / 1024.0 / 1024.0 | ||
print("Precalculation instance \(String(format: "%.3f", stopWatch.elapsedTime))s \(String(format: "%.1f", Double(memDelta) / 1024.0 / 1024.0))MB \(dict.wordCount) words \(dict.entryCount) entries MaxEditDistance=\(maxEditDistance) prefixLength=\(prefixLength) dict=\(dictionaryName[i])") | ||
|
||
// Benchmark lookup | ||
for verbosity in SymSpell.Verbosity.allCases { | ||
// Instantiated exact | ||
stopWatch.start() | ||
for _ in 0 ..< repetitions { | ||
resultNumber = dict.lookup("different", verbosity: verbosity, maxEditDistance: maxEditDistance).count | ||
} | ||
stopWatch.stop() | ||
totalLookupTime += stopWatch.elapsedTime | ||
totalMatches += Int64(resultNumber) | ||
// print("Lookup instance \(resultNumber) results \(String(format: "%.6f", stopWatch.elapsedTime / Double(repetitions)))ms/op verbosity=\(verbosity) query=exact") | ||
|
||
totalRepetitions += Int64(repetitions) | ||
} | ||
} | ||
} | ||
} | ||
|
||
print("Average Precalculation time instance \(String(format: "%.3f", totalLoadTime / Double(totalLoopCount)))s") | ||
print("Average Lookup time instance \(String(format: "%.3f", totalLookupTime / Double(totalRepetitions)))ms") | ||
print("Total Lookup results instance \(totalMatches)") | ||
} | ||
|
||
static func main() { | ||
warmUp() | ||
benchmarkPrecalculationLookup() | ||
} | ||
} |
Oops, something went wrong.