From 9770696f7f792eae35f3122c8c0a88f3e5ed59b5 Mon Sep 17 00:00:00 2001 From: Stephen Fox Jr Date: Mon, 20 Mar 2023 17:43:52 -0400 Subject: [PATCH] logging: Added more debugs. Make debug default. We can switch the log level back to the default once we feel a bit more comfortable with the application's stability. --- internal/app/app.go | 14 +++- internal/auditd/sessiontracker.go | 89 ++++++++++++++++++++++- internal/integration_tests/helper_test.go | 16 ++-- internal/integration_tests/ubuntu_test.go | 2 +- internal/journald/processentry.go | 14 ++++ main.go | 8 +- 6 files changed, 121 insertions(+), 22 deletions(-) diff --git a/internal/app/app.go b/internal/app/app.go index 2522c033..558f2e25 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -11,6 +11,7 @@ import ( "github.com/metal-toolbox/auditevent" "github.com/metal-toolbox/auditevent/helpers" "go.uber.org/zap" + "go.uber.org/zap/zapcore" "golang.org/x/sync/errgroup" "github.com/metal-toolbox/audito-maldito/internal/auditd" @@ -31,10 +32,11 @@ OPTIONS var logger *zap.SugaredLogger -func Run(ctx context.Context, osArgs []string, h *common.Health, newLoggerFn func() (*zap.Logger, error)) error { +func Run(ctx context.Context, osArgs []string, h *common.Health, optLoggerConfig *zap.Config) error { var bootID string var auditlogpath string var auditLogDirPath string + logLevel := zapcore.DebugLevel // TODO: Switch default back to zapcore.ErrorLevel. flagSet := flag.NewFlagSet(osArgs[0], flag.ContinueOnError) @@ -42,6 +44,7 @@ func Run(ctx context.Context, osArgs []string, h *common.Health, newLoggerFn fun flagSet.StringVar(&bootID, "boot-id", "", "Optional Linux boot ID to use when reading from the journal") flagSet.StringVar(&auditlogpath, "audit-log-path", "/app-audit/audit.log", "Path to the audit log file") flagSet.StringVar(&auditLogDirPath, "audit-dir-path", "/var/log/audit", "Path to the Linux audit log directory") + flagSet.Var(&logLevel, "log-level", "Set the log level according to zapcore.Level") flagSet.Usage = func() { os.Stderr.WriteString(usage) flagSet.PrintDefaults() @@ -53,7 +56,14 @@ func Run(ctx context.Context, osArgs []string, h *common.Health, newLoggerFn fun return err } - l, err := newLoggerFn() + if optLoggerConfig == nil { + cfg := zap.NewProductionConfig() + optLoggerConfig = &cfg + } + + optLoggerConfig.Level = zap.NewAtomicLevelAt(logLevel) + + l, err := optLoggerConfig.Build() if err != nil { return err } diff --git a/internal/auditd/sessiontracker.go b/internal/auditd/sessiontracker.go index b3792000..e45cacb0 100644 --- a/internal/auditd/sessiontracker.go +++ b/internal/auditd/sessiontracker.go @@ -52,6 +52,12 @@ type sessionTracker struct { } func (o *sessionTracker) remoteLogin(rul common.RemoteUserLogin) error { + var debugLogger *zap.SugaredLogger + if logger.Level() == zap.DebugLevel { + debugLogger = logger.With("RemoteUserLogin", rul) + debugLogger.Debugln("new remote user login") + } + err := rul.Validate() if err != nil { return &sessionTrackerError{ @@ -62,8 +68,17 @@ func (o *sessionTracker) remoteLogin(rul common.RemoteUserLogin) error { } // Check if there is an auditd session for this login. - for _, u := range o.sessIDsToUsers { + for asi, u := range o.sessIDsToUsers { if u.srcPID == rul.PID { + if debugLogger != nil { + debugLogger.With( + "auditSessionID", asi, + "auditSessionStartTime", u.added, + "numCachedAuditEvents", len(u.cached), + "hasRUL", u.hasRUL). + Debugln("found existing audit session for remote user login") + } + u.hasRUL = true u.login = rul @@ -71,6 +86,10 @@ func (o *sessionTracker) remoteLogin(rul common.RemoteUserLogin) error { } } + if debugLogger != nil { + debugLogger.Debugln("no matching audit session found") + } + _, hasIt := o.pidsToRULs[rul.PID] if hasIt { logger.Warnf("got a remote user login with a pid that already exists in the map (%d)", @@ -93,15 +112,29 @@ func (o *sessionTracker) auditdEvent(event *aucoalesce.Event) error { return nil } - u, hasSession := o.sessIDsToUsers[event.Session] + var debugLogger *zap.SugaredLogger if logger.Level() == zap.DebugLevel { - logger.Debugf("user: %s | session: %s | has-session: %t | event-type: %s | summary: %v", - event.User.Names, event.Session, hasSession, event.Type, event.Summary) + debugLogger = logger.With( + "auditEvent", *event, + "auditEventType", event.Type.String(), + "auditSessionID", event.Session) + debugLogger.Debugln("new audit event") } + + u, hasSession := o.sessIDsToUsers[event.Session] + //nolint:nestif // Refactor later. if hasSession { // Either write the audit event to the event writer // or cache it for later. + if debugLogger != nil { + debugLogger.With( + "auditSessionStartTime", u.added, + "numCachedAuditEvents", len(u.cached), + "hasRUL", u.hasRUL). + Debugln("found existing audit session for audit event") + } + if u.hasRUL { // It looks like AUDIT_CRED_DISP indicates the // canonical end of a user session - but, there is @@ -135,6 +168,10 @@ func (o *sessionTracker) auditdEvent(event *aucoalesce.Event) error { // Create a new audit session. if event.Type != auparse.AUDIT_LOGIN { + if debugLogger != nil { + debugLogger.Debugln("skipping creation of new audit session for audit event") + } + // It appears AUDIT_LOGIN indicates the // canonical start of a user session. // At least, it is the event type @@ -143,6 +180,10 @@ func (o *sessionTracker) auditdEvent(event *aucoalesce.Event) error { return nil } + if debugLogger != nil { + debugLogger.Debugln("creating new audit session for audit event") + } + srcPID, err := strconv.Atoi(event.Process.PID) if err != nil { return &sessionTrackerError{ @@ -159,6 +200,10 @@ func (o *sessionTracker) auditdEvent(event *aucoalesce.Event) error { } if rul, hasRUL := o.pidsToRULs[srcPID]; hasRUL { + if debugLogger != nil { + debugLogger.Debugln("found existing remote user login for new audit session") + } + delete(o.pidsToRULs, srcPID) u.hasRUL = true @@ -177,6 +222,14 @@ func (o *sessionTracker) auditdEvent(event *aucoalesce.Event) error { return nil } + + if debugLogger != nil { + debugLogger.Debugln("no existing remote user login for new audit session") + } + } + + if debugLogger != nil { + debugLogger.Debugln("caching audit event") } // Cache the event if the audit session does not have @@ -188,16 +241,44 @@ func (o *sessionTracker) auditdEvent(event *aucoalesce.Event) error { } func (o *sessionTracker) deleteUsersWithoutLoginsBefore(t time.Time) { + var debugLogger *zap.SugaredLogger + if logger.Level() == zap.DebugLevel { + debugLogger = logger.With( + "cacheCleanup", "deleteUsersWithoutLoginsBefore", + "before", t.String()) + } + for id, u := range o.sessIDsToUsers { if !u.hasRUL && u.added.Before(t) { + if debugLogger != nil { + debugLogger.With( + "auditSessionID", id, + "auditSessionStartTime", u.added.String()). + Debugln("removing unused audit session") + } + delete(o.sessIDsToUsers, id) } } } func (o *sessionTracker) deleteRemoteUserLoginsBefore(t time.Time) { + var debugLogger *zap.SugaredLogger + if logger.Level() == zap.DebugLevel { + debugLogger = logger.With( + "cacheCleanup", "deleteRemoteUserLoginsBefore", + "before", t.String()) + } + for pid, userLogin := range o.pidsToRULs { if userLogin.Source.LoggedAt.Before(t) { + if debugLogger != nil { + debugLogger.With( + "pid", pid, + "source", *userLogin.Source). + Debugln("removing unused remote user login") + } + delete(o.pidsToRULs, pid) } } diff --git a/internal/integration_tests/helper_test.go b/internal/integration_tests/helper_test.go index e2ab6b12..55f74aa2 100644 --- a/internal/integration_tests/helper_test.go +++ b/internal/integration_tests/helper_test.go @@ -24,16 +24,16 @@ import ( var ( debug *log.Logger +) - zapLoggerFn = func() (*zap.Logger, error) { - config := zap.NewDevelopmentConfig() - config.EncoderConfig = zap.NewDevelopmentEncoderConfig() - config.DisableStacktrace = true - config.Level = zap.NewAtomicLevelAt(zap.ErrorLevel) +func zapLoggerConfig() *zap.Config { + config := zap.NewDevelopmentConfig() + config.EncoderConfig = zap.NewDevelopmentEncoderConfig() + config.DisableStacktrace = true + config.Level = zap.NewAtomicLevelAt(zap.ErrorLevel) - return config.Build() - } -) + return &config +} // setupSSHCertAccess generates a SSH CA and a private key for the current // user. It then issues an SSH certificate for the user's key pair. diff --git a/internal/integration_tests/ubuntu_test.go b/internal/integration_tests/ubuntu_test.go index b58d84df..344e53b5 100644 --- a/internal/integration_tests/ubuntu_test.go +++ b/internal/integration_tests/ubuntu_test.go @@ -77,7 +77,7 @@ func TestSSHCertLoginAndExecStuff_Ubuntu(t *testing.T) { appErrs := make(chan error, 1) go func() { - appErrs <- app.Run(ctx, []string{"audito-maldito"}, appHealth, zapLoggerFn) + appErrs <- app.Run(ctx, []string{"audito-maldito"}, appHealth, zapLoggerConfig()) }() err := appHealth.WaitForReadyCtxOrTimeout(ctx, time.Minute) diff --git a/internal/journald/processentry.go b/internal/journald/processentry.go index 5e503823..73c43647 100644 --- a/internal/journald/processentry.go +++ b/internal/journald/processentry.go @@ -214,12 +214,26 @@ func processAcceptPublicKeyEntry(config *processEntryConfig) error { evt = evt.WithData(ed) } + var debugLogger *zap.SugaredLogger + if logger.Level() == zap.DebugLevel { + debugLogger = logger.With("eventPID", config.pid) + debugLogger.Debugln("writing event to auditevent writer...") + } + if err := config.eventW.Write(evt); err != nil { // NOTE(jaosorior): Not being able to write audit events // merits us panicking here. return fmt.Errorf("failed to write event: %w", err) } + if debugLogger != nil { + debugLogger.Debugln("writing event to remote user logins channel...") + + defer func() { + debugLogger.Debugln("finished writing event to remote user logins channel") + }() + } + select { case <-config.ctx.Done(): return nil diff --git a/main.go b/main.go index 813e2267..8f66325b 100644 --- a/main.go +++ b/main.go @@ -10,8 +10,6 @@ import ( "os/signal" "syscall" - "go.uber.org/zap" - "github.com/metal-toolbox/audito-maldito/internal/app" "github.com/metal-toolbox/audito-maldito/internal/common" ) @@ -27,9 +25,5 @@ func mainWithError() error { ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) defer stop() - newLoggerFn := func() (*zap.Logger, error) { - return zap.NewProduction() - } - - return app.Run(ctx, os.Args, common.NewHealth(), newLoggerFn) + return app.Run(ctx, os.Args, common.NewHealth(), nil) }