Skip to content

Commit

Permalink
chore(minor): Fix sampling for virtual/resident memory peaks (#200)
Browse files Browse the repository at this point in the history
As reported here: https://forums.swift.org/t/compiler-optimisations-for-functional-style-collection-algorithms/68291/12 the peak resident memory counter showed invalid results - this PR enables the missing sampling for these two peak counters (virtual/resident) so it provides expected results.
  • Loading branch information
hassila authored Nov 14, 2023
1 parent d632894 commit 4038f5e
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 24 deletions.
14 changes: 7 additions & 7 deletions Sources/Benchmark/BenchmarkExecutor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,8 @@ final class BenchmarkExecutor { // swiftlint:disable:this type_body_length
var iterations = 0
let initialStartTime = BenchmarkClock.now

// 'Warmup' to remove initial mallocs from stats in p100
if mallocStatsRequested {
_ = MallocStatsProducer.makeMallocStats()
}
// 'Warmup' to remove initial mallocs from stats in p100, also used as base for some metrics
_ = MallocStatsProducer.makeMallocStats() // baselineMallocStats

// Calculate typical sys call check overhead and deduct that to get 'clean' stats for the actual benchmark
var operatingSystemStatsOverhead = OperatingSystemStats()
Expand Down Expand Up @@ -200,10 +198,10 @@ final class BenchmarkExecutor { // swiftlint:disable:this type_body_length
delta = stopMallocStats.mallocCountLarge - startMallocStats.mallocCountLarge
statistics[.mallocCountLarge]?.add(Int(delta))

delta = stopMallocStats.allocatedResidentMemory -
startMallocStats.allocatedResidentMemory
delta = stopMallocStats.allocatedResidentMemory - startMallocStats.allocatedResidentMemory
statistics[.memoryLeaked]?.add(Int(delta))

// delta = stopMallocStats.allocatedResidentMemory - baselineMallocStats.allocatedResidentMemory // baselineMallocStats!
statistics[.allocatedResidentMemory]?.add(Int(stopMallocStats.allocatedResidentMemory))
}

Expand Down Expand Up @@ -269,7 +267,9 @@ final class BenchmarkExecutor { // swiftlint:disable:this type_body_length
}

if benchmark.configuration.metrics.contains(.threads) ||
benchmark.configuration.metrics.contains(.threadsRunning) {
benchmark.configuration.metrics.contains(.threadsRunning) ||
benchmark.configuration.metrics.contains(.peakMemoryResident) ||
benchmark.configuration.metrics.contains(.peakMemoryVirtual) {
operatingSystemStatsProducer.startSampling(5_000) // ~5 ms
}

Expand Down
2 changes: 1 addition & 1 deletion Sources/Benchmark/BenchmarkMetric.swift
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ public extension BenchmarkMetric {
case .mallocCountTotal:
return "Malloc (total)"
case .allocatedResidentMemory:
return "Memory (allocated)"
return "Memory (allocated resident)"
case .memoryLeaked:
return "Malloc / free Δ"
case .syscalls:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import ExtrasJSON
// var largeNMallocMIB = setupMIB(name: "stats.arenas.\(MALLCTL_ARENAS_ALL).large.nmalloc")
// var smallNDallocMIB = setupMIB(name: "stats.arenas.\(MALLCTL_ARENAS_ALL).small.ndalloc")
// var largeNDallocMIB = setupMIB(name: "stats.arenas.\(MALLCTL_ARENAS_ALL).large.ndalloc")
// var smallAlloctedMIB = setupMIB(name: "stats.arenas.\(MALLCTL_ARENAS_ALL).small.allocated")
// var smallAllocatedMIB = setupMIB(name: "stats.arenas.\(MALLCTL_ARENAS_ALL).small.allocated")
// var largeAllocatedMIB = setupMIB(name: "stats.arenas.\(MALLCTL_ARENAS_ALL).large.allocated")
// var smallNFillsMIB = setupMIB(name: "stats.arenas.\(MALLCTL_ARENAS_ALL).small.nfills")
// var largeNFillsMIB = setupMIB(name: "stats.arenas.\(MALLCTL_ARENAS_ALL).large.nfills")
Expand All @@ -53,8 +53,6 @@ import ExtrasJSON

// Update jemalloc internal statistics, this is the magic incantation to do it
static func updateEpoch() {
var allocated = 0
var size = MemoryLayout<Int>.size
var epoch = 0
let epochSize = MemoryLayout<Int>.size
var result: Int32 = 0
Expand All @@ -66,11 +64,10 @@ import ExtrasJSON
}

// Then update epoch
result = mallctlbymib(epochMIB, epochMIB.count, &allocated, &size, &epoch, epochSize)
result = mallctlbymib(epochMIB, epochMIB.count, nil, nil, &epoch, epochSize)
if result != 0 {
print("mallctlbymib epochMIB returned \(result)")
}
// return epoch
}

// Read the actual stats using a cached MIB as the key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
let semaphore = DispatchSemaphore(value: 0)
var peakThreads: Int = 0
var peakThreadsRunning: Int = 0
var peakMemoryResident: Int = 0
var peakMemoryVirtual: Int = 0
var runState: RunState = .running
var sampleRate: Int = 10_000
var metrics: Set<BenchmarkMetric>?
Expand Down Expand Up @@ -89,11 +91,16 @@

func startSampling(_: Int = 10_000) { // sample rate in microseconds
#if os(macOS)
let sampleSemaphore = DispatchSemaphore(value: 0)

DispatchQueue.global(qos: .userInitiated).async {
self.lock.lock()
let rate = self.sampleRate
self.peakThreads = 0
self.peakThreadsRunning = 0
self.peakMemoryResident = 0
self.peakMemoryVirtual = 0

self.runState = .running
self.lock.unlock()

Expand All @@ -109,6 +116,14 @@
self.peakThreadsRunning = Int(procTaskInfo.pti_numrunning)
}

if procTaskInfo.pti_resident_size > self.peakMemoryResident {
self.peakMemoryResident = Int(procTaskInfo.pti_resident_size)
}

if procTaskInfo.pti_virtual_size > self.peakMemoryVirtual {
self.peakMemoryVirtual = Int(procTaskInfo.pti_virtual_size)
}

if self.runState == .shuttingDown {
self.runState = .done
self.semaphore.signal()
Expand All @@ -117,15 +132,18 @@
let quit = self.runState
self.lock.unlock()

sampleSemaphore.signal()

if quit == .done {
return
}

usleep(UInt32.random(in: UInt32(Double(rate) * 0.9) ... UInt32(Double(rate) * 1.1)))
}
}
// We'll sleep just a little bit to let the sampler thread get going so we don't get 0 samples
usleep(1_000)

// We'll need to wait for a single sample from the so we don't get 0 samples
sampleSemaphore.wait()
#endif
}

Expand Down Expand Up @@ -154,13 +172,21 @@
let totalTime = userTime + systemTime
var threads = 0
var threadsRunning = 0
var peakResident = 0
var peakVirtual = 0

if metrics.contains(.threads) || metrics.contains(.threadsRunning) {
if metrics.contains(.threads) ||
metrics.contains(.threadsRunning) ||
metrics.contains(.peakMemoryResident) ||
metrics.contains(.peakMemoryVirtual) {
lock.lock()
threads = peakThreads
threadsRunning = peakThreadsRunning
peakResident = peakMemoryResident
peakVirtual = peakMemoryVirtual
lock.unlock()
}

var ioStats = IOStats()

if metrics.contains(.writeBytesPhysical) || metrics.contains(.writeBytesPhysical) {
Expand All @@ -170,8 +196,8 @@
let stats = OperatingSystemStats(cpuUser: userTime,
cpuSystem: systemTime,
cpuTotal: totalTime,
peakMemoryResident: Int(procTaskInfo.pti_resident_size),
peakMemoryVirtual: Int(procTaskInfo.pti_virtual_size),
peakMemoryResident: peakResident,
peakMemoryVirtual: peakVirtual,
syscalls: Int(procTaskInfo.pti_syscalls_unix) +
Int(procTaskInfo.pti_syscalls_mach),
contextSwitches: Int(procTaskInfo.pti_csw),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
let lock = NIOLock()
let semaphore = DispatchSemaphore(value: 0)
var peakThreads: Int = 0
var peakThreadsRunning: Int = 0
var peakMemoryResident: Int = 0
var peakMemoryVirtual: Int = 0
var sampleRate: Int = 10_000
var runState: RunState = .running
var metrics: Set<BenchmarkMetric>?
Expand Down Expand Up @@ -98,18 +101,39 @@
}

func makeOperatingSystemStats() -> OperatingSystemStats {
guard let metrics else {
return .init()
}

let ioStats = readIOStats()
let processStats = readProcessStats()

var threads = 0
var threadsRunning = 0
var peakResident = 0
var peakVirtual = 0

if metrics.contains(.threads) ||
metrics.contains(.threadsRunning) ||
metrics.contains(.peakMemoryResident) ||
metrics.contains(.peakMemoryVirtual) {
lock.lock()
threads = peakThreads
threadsRunning = peakThreadsRunning
peakResident = peakMemoryResident
peakVirtual = peakMemoryVirtual
lock.unlock()
}

return OperatingSystemStats(cpuUser: Int(processStats.cpuUser),
cpuSystem: Int(processStats.cpuSystem),
cpuTotal: Int(processStats.cpuTotal),
peakMemoryResident: Int(processStats.peakMemoryResident),
peakMemoryVirtual: Int(processStats.peakMemoryVirtual),
peakMemoryResident: peakResident,
peakMemoryVirtual: peakVirtual,
syscalls: 0,
contextSwitches: 0,
threads: Int(processStats.threads),
threadsRunning: 0, // we can go dig in /proc/self/task/ later if want this
threads: threads,
threadsRunning: threadsRunning, // we can go dig in /proc/self/task/ later if want this
readSyscalls: Int(ioStats.readSyscalls),
writeSyscalls: Int(ioStats.writeSyscalls),
readBytesLogical: Int(ioStats.readBytesLogical),
Expand All @@ -132,11 +156,15 @@
}

func startSampling(_: Int = 10_000) { // sample rate in microseconds
let sampleSemaphore = DispatchSemaphore(value: 0)

DispatchQueue.global(qos: .userInitiated).async {
self.lock.lock()

let rate = self.sampleRate
self.peakThreads = 0
self.peakMemoryResident = 0
self.peakMemoryVirtual = 0
self.runState = .running

self.lock.unlock()
Expand All @@ -150,6 +178,14 @@
self.peakThreads = processStats.threads
}

if processStats.peakMemoryResident > self.peakMemoryResident {
self.peakMemoryResident = processStats.peakMemoryResident
}

if processStats.peakMemoryVirtual > self.peakMemoryVirtual {
self.peakMemoryVirtual = processStats.peakMemoryVirtual
}

if self.runState == .shuttingDown {
self.runState = .done
self.semaphore.signal()
Expand All @@ -159,15 +195,17 @@

self.lock.unlock()

sampleSemaphore.signal()

if quit == .done {
return
}

usleep(UInt32.random(in: UInt32(Double(rate) * 0.9) ... UInt32(Double(rate) * 1.1)))
}
}
// We'll sleep just a little bit to let the sampler thread get going so we try to avoid 0 samples
usleep(1_000)
// We'll need to wait for a single sample from the so we don't get 0 samples
sampleSemaphore.wait()
}

func stopSampling() {
Expand Down

0 comments on commit 4038f5e

Please sign in to comment.