Skip to content

Commit

Permalink
redefine SIGHUP
Browse files Browse the repository at this point in the history
* to log memory stats, num goroutines, num open files
* for the latter, add platform dependent bits
* with refactoring

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Jan 30, 2025
1 parent 2620901 commit 7a54c91
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 12 deletions.
19 changes: 7 additions & 12 deletions hk/housekeeper.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
// Package hk provides mechanism for registering cleanup
// functions which are invoked at specified intervals.
/*
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
*/
package hk

import (
"container/heap"
"os"
"os/signal"
"syscall"
"time"

Expand Down Expand Up @@ -75,7 +74,7 @@ func _init(mustRun bool) {
if mustRun {
HK.running.Store(false)
} else {
HK.running.Store(true) // tests only
HK.running.Store(true) // mustRun == false: tests only
}
heap.Init(HK.actions)
}
Expand Down Expand Up @@ -121,11 +120,8 @@ func (hk *hk) terminate() {
func (*hk) Stop(error) { HK.stopCh.Close() }

func (hk *hk) Run() (err error) {
signal.Notify(hk.sigCh,
syscall.SIGINT, // kill -SIGINT (Ctrl-C)
syscall.SIGTERM, // kill -SIGTERM
syscall.SIGQUIT, // kill -SIGQUIT
)
hk.setSignal() // SIGINT, et al. - see handleSignal() below

hk.timer = time.NewTimer(time.Hour)
hk.running.Store(true)
err = hk._run()
Expand Down Expand Up @@ -198,10 +194,9 @@ func (hk *hk) _run() error {

case s, ok := <-hk.sigCh:
if ok {
signal.Stop(hk.sigCh)
err := cos.NewSignalError(s.(syscall.Signal))
hk.Stop(err)
return err
if err := hk.handleSignal(s.(syscall.Signal)); err != nil {
return err
}
}
}
}
Expand Down
12 changes: 12 additions & 0 deletions hk/sig_darwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Package hk provides mechanism for registering cleanup
// functions which are invoked at specified intervals.
/*
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
*/
package hk

import "errors"

func numOpenFiles() (int, error) {
return 0, errors.New("num-open-files not implemented yet")
}
32 changes: 32 additions & 0 deletions hk/sig_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Package hk provides mechanism for registering cleanup
// functions which are invoked at specified intervals.
/*
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
*/
package hk

import (
"os"
"path/filepath"
"strconv"
)

// TODO: consider moving to `cos` and logging (`stats`) every 4h or so
func numOpenFiles() (int, error) {
var (
pid = os.Getpid()
proddir = filepath.Join("/proc", strconv.Itoa(pid), "fd")
dir, err = os.Open(proddir)
)
if err != nil {
return 0, err
}
defer dir.Close()

// read just the names
names, e := dir.Readdirnames(0)
if e != nil {
return 0, e
}
return len(names), nil
}
49 changes: 49 additions & 0 deletions hk/sigprod.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Package hk provides mechanism for registering cleanup
// functions which are invoked at specified intervals.
/*
* Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
*/
package hk

import (
"os/signal"
"runtime"
"strings"
"syscall"

"github.com/NVIDIA/aistore/cmn/cos"
"github.com/NVIDIA/aistore/cmn/nlog"
"github.com/NVIDIA/aistore/sys"
)

func (hk *hk) setSignal() {
signal.Notify(hk.sigCh,
// ignore
syscall.SIGHUP, // kill -SIGHUP
// terminate
syscall.SIGINT, // kill -SIGINT (Ctrl-C)
syscall.SIGTERM, // kill -SIGTERM
syscall.SIGQUIT, // kill -SIGQUIT
)
}

func (hk *hk) handleSignal(s syscall.Signal) error {
if s == syscall.SIGHUP {
// no-op: show up in the log with some useful info
var (
sb strings.Builder
mem sys.MemStat
ngr = runtime.NumGoroutine()
)
erm := mem.Get()
mem.Str(&sb)
nfd, erf := numOpenFiles()
nlog.Infoln("ngr [", ngr, sys.NumCPU(), "] mem [", sb.String(), erm, "]", "num-fd [", nfd, erf, "]")
return nil
}

signal.Stop(hk.sigCh)
err := cos.NewSignalError(s)
hk.Stop(err)
return err
}

0 comments on commit 7a54c91

Please sign in to comment.