Skip to content

Commit

Permalink
Added signal handler in sub go routines.
Browse files Browse the repository at this point in the history
  • Loading branch information
XuechunHou committed Dec 3, 2024
1 parent 206f0d1 commit 90a9278
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 8 deletions.
1 change: 1 addition & 0 deletions cmd/ops_agent_uap_wrapper/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func main() {

ctx := context.Background()
log.Println("Starting Ops Agent UAP Plugin")
//ps.Start(ctx, &pb.StartRequest{Config: &pb.StartRequest_Config{StateDirectoryPath: "/var/log/google-cloud-ops-agent"}})
ps.Start(ctx, &pb.StartRequest{})
for {
status, _ := ps.GetStatus(ctx, &pb.GetStatusRequest{})
Expand Down
23 changes: 15 additions & 8 deletions cmd/ops_agent_uap_wrapper/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func (ps *OpsAgentPluginServer) Cancel() {
// interface notification of the "exiting" state.
func sigHandler(ctx context.Context, cancel func(sig os.Signal)) {
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT, syscall.SIGQUIT, syscall.SIGHUP, syscall.SIGKILL)
signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGKILL)
go func() {
select {
case sig := <-sigChan:
Expand Down Expand Up @@ -119,22 +119,27 @@ func (ps *OpsAgentPluginServer) runAgent(ctx context.Context) {
}

var wg sync.WaitGroup

cancelOnSignal := func(_ os.Signal) {
// We're handling some external signal here, set cleanup to [false].
// If this was Guest Agent trying to stop it would call [Stop] RPC directly
// or do a [SIGKILL] which anyways cannot be intercepted.
ps.Stop(ctx, &pb.StopRequest{Cleanup: false})
}
// Starting Diagnostics Service
execDiagnosticsCmd := exec.CommandContext(ctx,
Prefix+"/libexec/google_cloud_ops_agent_diagnostics",
"-config", Sysconfdir+"/google-cloud-ops-agent/config.yaml",
)
wg.Add(1)
go restartCommand(ctx, &wg, ps.logger, execDiagnosticsCmd)
go restartCommand(ctx, &wg, ps.logger, execDiagnosticsCmd, cancelOnSignal)

// Starting Otel
execOtelCmd := exec.CommandContext(ctx,
Prefix+"/subagents/opentelemetry-collector/otelopscol",
"--config", OtelRuntimeDirectory+"/otel.yaml",
)
wg.Add(1)
go restartCommand(ctx, &wg, ps.logger, execOtelCmd)
go restartCommand(ctx, &wg, ps.logger, execOtelCmd, cancelOnSignal)

// Starting FluentBit
execFluentBitCmd := exec.CommandContext(ctx,
Expand All @@ -147,7 +152,7 @@ func (ps *OpsAgentPluginServer) runAgent(ctx context.Context) {
"--storage_path", FluentBitStateDiectory+"/buffers",
)
wg.Add(1)
go restartCommand(ctx, &wg, ps.logger, execFluentBitCmd)
go restartCommand(ctx, &wg, ps.logger, execFluentBitCmd, cancelOnSignal)
wg.Wait()
ps.logger.Infof("wait group has exited")
}
Expand Down Expand Up @@ -220,7 +225,7 @@ func runCommand(cmd *exec.Cmd, logger logs.StructuredLogger) error {
return nil
}

func restartCommand(ctx context.Context, wg *sync.WaitGroup, logger logs.StructuredLogger, cmd *exec.Cmd) {
func restartCommand(ctx context.Context, wg *sync.WaitGroup, logger logs.StructuredLogger, cmd *exec.Cmd, cancel func(sig os.Signal) ) {
defer wg.Done()
if cmd == nil {
return
Expand All @@ -230,6 +235,8 @@ func restartCommand(ctx context.Context, wg *sync.WaitGroup, logger logs.Structu
logger.Warnf("Context has been cancelled, exiting")
return
}
// Register signal handler and implements its callback.
sigHandler(ctx, cancel)
cmd.SysProcAttr = &syscall.SysProcAttr{
Pdeathsig: syscall.SIGKILL,

Check failure on line 241 in cmd/ops_agent_uap_wrapper/service.go

View workflow job for this annotation

GitHub Actions / test (windows-latest)

unknown field Pdeathsig in struct literal of type "syscall".SysProcAttr

Check failure on line 241 in cmd/ops_agent_uap_wrapper/service.go

View workflow job for this annotation

GitHub Actions / test (windows-latest)

unknown field Pdeathsig in struct literal of type "syscall".SysProcAttr
Setpgid: true,

Check failure on line 242 in cmd/ops_agent_uap_wrapper/service.go

View workflow job for this annotation

GitHub Actions / test (windows-latest)

unknown field Setpgid in struct literal of type "syscall".SysProcAttr

Check failure on line 242 in cmd/ops_agent_uap_wrapper/service.go

View workflow job for this annotation

GitHub Actions / test (windows-latest)

unknown field Setpgid in struct literal of type "syscall".SysProcAttr
Expand All @@ -255,9 +262,9 @@ func restartCommand(ctx context.Context, wg *sync.WaitGroup, logger logs.Structu
}
// Sleep 10 seconds before retarting the task
time.Sleep(5 * time.Second)
cmdToRestart := exec.CommandContext(ctx, cmd.Path, cmd.Args...)
cmdToRestart := exec.CommandContext(ctx, cmd.Path, cmd.Args[1:]...)
wg.Add(1)
go restartCommand(ctx, wg, logger, cmdToRestart)
go restartCommand(ctx, wg, logger, cmdToRestart, cancel)
}

func CreateOpsAgentUapPluginLogger(logDir string) logs.StructuredLogger {
Expand Down

0 comments on commit 90a9278

Please sign in to comment.