From 6fb9961c74da55d1d270621770b2483b3dfddc1c Mon Sep 17 00:00:00 2001
From: Christy Jacob <christyjacob4@gmail.com>
Date: Fri, 20 Dec 2024 22:20:24 +0530
Subject: [PATCH 1/2] fix: incident creation and resolution logic

---
 docker-compose.yml |   9 +-
 logger.go          |  69 ++++++++++
 main.go            | 325 +++++++++++++++++++++++----------------------
 3 files changed, 236 insertions(+), 167 deletions(-)
 create mode 100644 logger.go

diff --git a/docker-compose.yml b/docker-compose.yml
index fd91496..de1d2cf 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,16 +1,15 @@
-version: '3.8'
-
 services:
   monitoring:
     build:
       context: .
       dockerfile: Dockerfile
+    hostname: monitoring-local
     command:
       - monitoring
       - "--url=${BETTER_STACK_URL}"
-      - "--interval=10"
-      - "--cpu-limit=90"
-      - "--memory-limit=80"
+      - "--interval=5"
+      - "--cpu-limit=5"
+      - "--memory-limit=10"
       - "--disk-limit=85"
     volumes:
       - /:/host:ro
diff --git a/logger.go b/logger.go
new file mode 100644
index 0000000..45c20e4
--- /dev/null
+++ b/logger.go
@@ -0,0 +1,69 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"time"
+)
+
+const (
+	colorReset  = "\033[0m"
+	colorRed    = "\033[31m"
+	colorGreen  = "\033[32m"
+	colorYellow = "\033[33m"
+	colorBlue   = "\033[34m"
+	colorPurple = "\033[35m"
+	colorCyan   = "\033[36m"
+)
+
+type Logger struct {
+	logger *log.Logger
+}
+
+func New() *Logger {
+	return &Logger{
+		logger: log.New(os.Stdout, "", 0),
+	}
+}
+
+func (l *Logger) formatMessage(level, format string, args ...interface{}) string {
+	timestamp := time.Now().Format("2006-01-02 15:04:05")
+	message := fmt.Sprintf(format, args...)
+	return fmt.Sprintf("%s [%s] %s", timestamp, level, message)
+}
+
+func (l *Logger) Log(format string, args ...interface{}) {
+	msg := l.formatMessage("LOG", format, args...)
+	l.logger.Printf("%s", msg)
+}
+
+func (l *Logger) Success(format string, args ...interface{}) {
+	msg := l.formatMessage("SUCCESS", format, args...)
+	l.logger.Printf("%s%s%s", colorGreen, msg, colorReset)
+}
+
+func (l *Logger) Warn(format string, args ...interface{}) {
+	msg := l.formatMessage("WARNING", format, args...)
+	l.logger.Printf("%s%s%s", colorYellow, msg, colorReset)
+}
+
+func (l *Logger) Error(format string, args ...interface{}) {
+	msg := l.formatMessage("ERROR", format, args...)
+	l.logger.Printf("%s%s%s", colorRed, msg, colorReset)
+}
+
+func (l *Logger) Info(format string, args ...interface{}) {
+	msg := l.formatMessage("INFO", format, args...)
+	l.logger.Printf("%s%s%s", colorBlue, msg, colorReset)
+}
+
+func (l *Logger) Debug(format string, args ...interface{}) {
+	msg := l.formatMessage("DEBUG", format, args...)
+	l.logger.Printf("%s%s%s", colorCyan, msg, colorReset)
+}
+
+func (l *Logger) Fatal(format string, args ...interface{}) {
+	msg := l.formatMessage("FATAL", format, args...)
+	l.logger.Fatalf("%s%s%s", colorPurple, msg, colorReset)
+} 
\ No newline at end of file
diff --git a/main.go b/main.go
index 4d63b86..94afcd2 100644
--- a/main.go
+++ b/main.go
@@ -4,7 +4,7 @@ import (
 	"encoding/json"
 	"flag"
 	"fmt"
-	"log"
+	"io"
 	"net/http"
 	"os"
 	"path/filepath"
@@ -16,23 +16,25 @@ import (
 	"github.com/shirou/gopsutil/v3/mem"
 )
 
-type Incident struct {
-	Title     string `json:"title"`
-	Cause     string `json:"cause"`
-	AlertID   string `json:"alert_id"`
-	Timestamp int64  `json:"timestamp"`
-	Resolved  bool   `json:"resolved,omitempty"`
+type Metric struct {
+	Title     string  `json:"title"`
+	Cause     string  `json:"cause"`
+	AlertID   string  `json:"alert_id"`
+	Timestamp int64   `json:"timestamp"`
+	Status    string  `json:"status"`
+	Value     float64 `json:"value"`
+	Limit     float64 `json:"limit"`
 }
 
 type SystemMonitor struct {
-	httpClient    *http.Client
-	incidents     map[string][]Incident
+	httpClient     *http.Client
 	betterStackURL string
-	hostname      string
-	cpuLimit      float64
-	memoryLimit   float64
-	diskLimit     float64
-	interval      int
+	hostname       string
+	cpuLimit       float64
+	memoryLimit    float64
+	diskLimit      float64
+	interval       int
+	log            *Logger
 }
 
 func NewSystemMonitor(betterStackURL string, interval int, cpuLimit, memoryLimit, diskLimit float64) (*SystemMonitor, error) {
@@ -45,21 +47,17 @@ func NewSystemMonitor(betterStackURL string, interval int, cpuLimit, memoryLimit
 		httpClient: &http.Client{
 			Timeout: 5 * time.Second,
 		},
-		incidents: map[string][]Incident{
-			"cpu":    {},
-			"memory": {},
-			"disk":   {},
-		},
 		betterStackURL: betterStackURL,
-		hostname:      hostname,
-		cpuLimit:      cpuLimit,
-		memoryLimit:   memoryLimit,
-		diskLimit:     diskLimit,
-		interval:      interval,
+		hostname:       hostname,
+		cpuLimit:       cpuLimit,
+		memoryLimit:    memoryLimit,
+		diskLimit:      diskLimit,
+		interval:       interval,
+		log:            New(),
 	}, nil
 }
 
-func (s *SystemMonitor) evaluateCPUIncident() (*Incident, error) {
+func (s *SystemMonitor) checkCPU() error {
 	duration := float64(s.interval) / 10
 	if duration < 5 {
 		duration = 5
@@ -70,119 +68,149 @@ func (s *SystemMonitor) evaluateCPUIncident() (*Incident, error) {
 
 	cpuPercent, err := cpu.Percent(time.Duration(duration)*time.Second, false)
 	if err != nil {
-		return nil, fmt.Errorf("failed to get CPU usage: %v", err)
+		return fmt.Errorf("failed to get CPU usage: %v", err)
 	}
 
 	if len(cpuPercent) == 0 {
-		return nil, nil
+		return nil
 	}
 
-	log.Printf("CPU usage: %.2f%%\n", cpuPercent[0])
-	if cpuPercent[0] > s.cpuLimit {
-		return &Incident{
-			Title:     fmt.Sprintf("CPU usage higher than %.0f%%! - %s", s.cpuLimit, s.hostname),
-			Cause:     "High CPU usage",
-			AlertID:   fmt.Sprintf("high-cpu-%s", s.hostname),
-			Timestamp: time.Now().Unix(),
-		}, nil
+	value := cpuPercent[0]
+	status := s.getStatus(value, s.cpuLimit)
+	if status == "fail" {
+		s.log.Warn("CPU usage %.2f%% exceeds limit of %.2f%%", value, s.cpuLimit)
+	} else {
+		s.log.Log("CPU usage: %.2f%% (limit: %.2f%%)", value, s.cpuLimit)
+	}
+	
+	metric := Metric{
+		Title:     fmt.Sprintf("CPU Usage - %s", s.hostname),
+		Cause:     "CPU monitoring check",
+		AlertID:   fmt.Sprintf("cpu-%s", s.hostname),
+		Timestamp: time.Now().Unix(),
+		Status:    status,
+		Value:     value,
+		Limit:     s.cpuLimit,
 	}
 
-	return nil, nil
+	return s.sendMetric(metric)
 }
 
-func (s *SystemMonitor) evaluateMemoryIncident() (*Incident, error) {
+func (s *SystemMonitor) checkMemory() error {
 	vmStat, err := mem.VirtualMemory()
 	if err != nil {
-		return nil, fmt.Errorf("failed to get memory stats: %v", err)
+		return fmt.Errorf("failed to get memory stats: %v", err)
 	}
 
-	log.Printf("Memory usage: %.2f%% (Available: %d MB, Total: %d MB)\n",
-		vmStat.UsedPercent,
-		vmStat.Available/(1024*1024),
-		vmStat.Total/(1024*1024))
+	value := vmStat.UsedPercent
+	status := s.getStatus(value, s.memoryLimit)
+	if status == "fail" {
+		s.log.Warn("Memory usage %.2f%% exceeds limit of %.2f%%", value, s.memoryLimit)
+	} else {
+		s.log.Log("Memory usage: %.2f%% (limit: %.2f%%), Available: %d MB, Total: %d MB",
+			value,
+			s.memoryLimit,
+			vmStat.Available/(1024*1024),
+			vmStat.Total/(1024*1024))
+	}
 
-	if vmStat.UsedPercent > s.memoryLimit {
-		return &Incident{
-			Title:     fmt.Sprintf("Memory usage higher than %.0f%%! - %s", s.memoryLimit, s.hostname),
-			Cause:     "High memory usage",
-			AlertID:   fmt.Sprintf("high-memory-%s", s.hostname),
-			Timestamp: time.Now().Unix(),
-		}, nil
+	metric := Metric{
+		Title:     fmt.Sprintf("Memory Usage - %s", s.hostname),
+		Cause:     "Memory monitoring check",
+		AlertID:   fmt.Sprintf("memory-%s", s.hostname),
+		Timestamp: time.Now().Unix(),
+		Status:    status,
+		Value:     value,
+		Limit:     s.memoryLimit,
 	}
 
-	return nil, nil
+	return s.sendMetric(metric)
 }
 
-func (s *SystemMonitor) evaluateDiskIncident() ([]Incident, error) {
-	var incidents []Incident
-
+func (s *SystemMonitor) checkDisk() error {
 	// Check root partition
 	usage, err := disk.Usage("/")
 	if err != nil {
-		return nil, fmt.Errorf("failed to get disk usage: %v", err)
+		return fmt.Errorf("failed to get disk usage: %v", err)
 	}
 
-	log.Printf("Diskspace used /: %.2f%% (Free: %d MB, Total: %d MB)\n",
-		usage.UsedPercent,
-		usage.Free/(1024*1024),
-		usage.Total/(1024*1024))
+	value := usage.UsedPercent
+	status := s.getStatus(value, s.diskLimit)
+	if status == "fail" {
+		s.log.Warn("Root disk usage %.2f%% exceeds limit of %.2f%%", value, s.diskLimit)
+	} else {
+		s.log.Log("Root disk usage: %.2f%% (limit: %.2f%%), Free: %d MB, Total: %d MB",
+			value,
+			s.diskLimit,
+			usage.Free/(1024*1024),
+			usage.Total/(1024*1024))
+	}
 
-	if usage.UsedPercent > s.diskLimit {
-		incidents = append(incidents, Incident{
-			Title:     fmt.Sprintf("Root disk usage higher than %.0f%%! - %s", s.diskLimit, s.hostname),
-			Cause:     "High disk usage",
-			AlertID:   fmt.Sprintf("high-disk-%s", s.hostname),
-			Timestamp: time.Now().Unix(),
-		})
+	if err := s.sendMetric(Metric{
+		Title:     fmt.Sprintf("Root Disk Usage - %s", s.hostname),
+		Cause:     "Disk monitoring check",
+		AlertID:   fmt.Sprintf("disk-root-%s", s.hostname),
+		Timestamp: time.Now().Unix(),
+		Status:    status,
+		Value:     value,
+		Limit:     s.diskLimit,
+	}); err != nil {
+		return err
 	}
 
 	// Check mounted directories
 	mounts, err := filepath.Glob("/mnt/*")
 	if err != nil {
-		return nil, fmt.Errorf("failed to list mounted directories: %v", err)
+		return fmt.Errorf("failed to list mounted directories: %v", err)
 	}
 
 	for _, mount := range mounts {
 		usage, err := disk.Usage(mount)
 		if err != nil {
-			log.Printf("Failed to get disk usage for %s: %v\n", mount, err)
+			s.log.Error("Failed to get disk usage for %s: %v", mount, err)
 			continue
 		}
 
-		log.Printf("Diskspace used %s: %.2f%% (Free: %d MB, Total: %d MB)\n",
-			mount,
-			usage.UsedPercent,
-			usage.Free/(1024*1024),
-			usage.Total/(1024*1024))
+		value := usage.UsedPercent
+		status := s.getStatus(value, s.diskLimit)
+		if status == "fail" {
+			s.log.Warn("Disk usage for %s %.2f%% exceeds limit of %.2f%%", mount, value, s.diskLimit)
+		} else {
+			s.log.Log("Disk usage for %s: %.2f%% (limit: %.2f%%), Free: %d MB, Total: %d MB",
+				mount,
+				value,
+				s.diskLimit,
+				usage.Free/(1024*1024),
+				usage.Total/(1024*1024))
+		}
 
-		if usage.UsedPercent > s.diskLimit {
-			incidents = append(incidents, Incident{
-				Title:     fmt.Sprintf("%s disk usage higher than %.0f%%! - %s", mount, s.diskLimit, s.hostname),
-				Cause:     "High disk usage",
-				AlertID:   fmt.Sprintf("high-disk-%s", s.hostname),
-				Timestamp: time.Now().Unix(),
-			})
+		if err := s.sendMetric(Metric{
+			Title:     fmt.Sprintf("Disk Usage %s - %s", mount, s.hostname),
+			Cause:     "Disk monitoring check",
+			AlertID:   fmt.Sprintf("disk-%s-%s", filepath.Base(mount), s.hostname),
+			Timestamp: time.Now().Unix(),
+			Status:    status,
+			Value:     value,
+			Limit:     s.diskLimit,
+		}); err != nil {
+			return err
 		}
 	}
 
-	return incidents, nil
-}
-
-func (s *SystemMonitor) createIncident(incident Incident) error {
-	log.Printf("Triggering incident: %s\n", incident.Title)
-	return s.sendIncident(incident)
+	return nil
 }
 
-func (s *SystemMonitor) resolveIncident(incident Incident) error {
-	log.Printf("Resolving incident: %s\n", incident.Title)
-	incident.Resolved = true
-	return s.sendIncident(incident)
+func (s *SystemMonitor) getStatus(value, limit float64) string {
+	if value > limit {
+		return "fail"
+	}
+	return "pass"
 }
 
-func (s *SystemMonitor) sendIncident(incident Incident) error {
-	body, err := json.Marshal(incident)
+func (s *SystemMonitor) sendMetric(metric Metric) error {
+	body, err := json.Marshal(metric)
 	if err != nil {
-		return fmt.Errorf("failed to marshal incident: %v", err)
+		return fmt.Errorf("failed to marshal metric: %v", err)
 	}
 
 	req, err := http.NewRequest(http.MethodPost, s.betterStackURL, strings.NewReader(string(body)))
@@ -190,8 +218,9 @@ func (s *SystemMonitor) sendIncident(incident Incident) error {
 		return fmt.Errorf("failed to create request: %v", err)
 	}
 
-	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("User-Agent", "Appwrite system-monitoring")
+	req.Header.Set("Content-Type", "application/json; charset=utf-8")
+	req.Header.Set("Accept", "application/json")
+	req.Header.Set("User-Agent", "Appwrite Resource Monitoring")
 
 	resp, err := s.httpClient.Do(req)
 	if err != nil {
@@ -199,52 +228,20 @@ func (s *SystemMonitor) sendIncident(incident Incident) error {
 	}
 	defer resp.Body.Close()
 
-	if resp.StatusCode >= 400 {
-		return fmt.Errorf("request failed with status: %d", resp.StatusCode)
-	}
-
-	return nil
-}
-
-func (s *SystemMonitor) processType(monitorType string, evaluate func() (interface{}, error)) error {
-	incidents, err := evaluate()
+	// Read response body
+	respBody, err := io.ReadAll(resp.Body)
 	if err != nil {
-		return fmt.Errorf("failed to evaluate %s: %v", monitorType, err)
-	}
-
-	if incidents == nil {
-		if len(s.incidents[monitorType]) > 0 {
-			log.Printf("Resolving active incident of type %s\n", monitorType)
-			for _, incident := range s.incidents[monitorType] {
-				if err := s.resolveIncident(incident); err != nil {
-					log.Printf("Failed to resolve incident: %v\n", err)
-				}
-			}
-			s.incidents[monitorType] = nil
-		}
-		return nil
+		return fmt.Errorf("failed to read response body: %v", err)
 	}
 
-	if len(s.incidents[monitorType]) > 0 {
-		log.Printf("Already have active incident of type '%s', skipping.\n", monitorType)
-		return nil
+	// Log response details without colors
+	s.log.Log("Response Status: %s", resp.Status)
+	if len(respBody) > 0 {
+		s.log.Log("Response Body: %s", string(respBody))
 	}
 
-	switch i := incidents.(type) {
-	case *Incident:
-		if i != nil {
-			if err := s.createIncident(*i); err != nil {
-				return fmt.Errorf("failed to create incident: %v", err)
-			}
-			s.incidents[monitorType] = []Incident{*i}
-		}
-	case []Incident:
-		for _, incident := range i {
-			if err := s.createIncident(incident); err != nil {
-				return fmt.Errorf("failed to create incident: %v", err)
-			}
-		}
-		s.incidents[monitorType] = i
+	if resp.StatusCode >= 400 {
+		return fmt.Errorf("request failed with status: %d, body: %s", resp.StatusCode, string(respBody))
 	}
 
 	return nil
@@ -254,29 +251,33 @@ func (s *SystemMonitor) Start() {
 	ticker := time.NewTicker(time.Duration(s.interval) * time.Second)
 	defer ticker.Stop()
 
+	// Initial check
+	s.runChecks()
+
+	// Periodic checks
 	for range ticker.C {
-		if err := s.processType("cpu", func() (interface{}, error) {
-			return s.evaluateCPUIncident()
-		}); err != nil {
-			log.Printf("Error processing CPU metrics: %v\n", err)
-		}
+		s.runChecks()
+	}
+}
 
-		if err := s.processType("memory", func() (interface{}, error) {
-			return s.evaluateMemoryIncident()
-		}); err != nil {
-			log.Printf("Error processing memory metrics: %v\n", err)
-		}
+func (s *SystemMonitor) runChecks() {
+	if err := s.checkCPU(); err != nil {
+		s.log.Error("Error checking CPU: %v", err)
+	}
 
-		if err := s.processType("disk", func() (interface{}, error) {
-			return s.evaluateDiskIncident()
-		}); err != nil {
-			log.Printf("Error processing disk metrics: %v\n", err)
-		}
+	if err := s.checkMemory(); err != nil {
+		s.log.Error("Error checking memory: %v", err)
+	}
+
+	if err := s.checkDisk(); err != nil {
+		s.log.Error("Error checking disk: %v", err)
 	}
 }
 
 func main() {
-	// Define command line flags
+	log := New()
+
+	// Command line flags
 	betterStackURL := flag.String("url", "", "BetterStack webhook URL (required)")
 	interval := flag.Int("interval", 300, "Check interval in seconds (default: 300)")
 	cpuLimit := flag.Float64("cpu-limit", 90.0, "CPU usage threshold percentage (default: 90)")
@@ -294,33 +295,33 @@ func main() {
 	// Validate required flags
 	if *betterStackURL == "" {
 		flag.Usage()
-		log.Fatal("Error: BetterStack webhook URL is required")
+		log.Fatal("BetterStack webhook URL is required")
 	}
 
 	// Validate ranges
 	if *interval <= 0 {
-		log.Fatal("Error: interval must be greater than 0")
+		log.Fatal("Interval must be greater than 0")
 	}
 	if *cpuLimit < 0 || *cpuLimit > 100 {
-		log.Fatal("Error: cpu-limit must be between 0 and 100")
+		log.Fatal("CPU limit must be between 0 and 100")
 	}
 	if *memoryLimit < 0 || *memoryLimit > 100 {
-		log.Fatal("Error: memory-limit must be between 0 and 100")
+		log.Fatal("Memory limit must be between 0 and 100")
 	}
 	if *diskLimit < 0 || *diskLimit > 100 {
-		log.Fatal("Error: disk-limit must be between 0 and 100")
+		log.Fatal("Disk limit must be between 0 and 100")
 	}
 
 	monitor, err := NewSystemMonitor(*betterStackURL, *interval, *cpuLimit, *memoryLimit, *diskLimit)
 	if err != nil {
-		log.Fatalf("Failed to create system monitor: %v", err)
+		log.Fatal("Failed to create system monitor: %v", err)
 	}
 
-	log.Printf("Starting monitoring with settings:")
-	log.Printf("- Check interval: %d seconds", *interval)
-	log.Printf("- CPU limit: %.1f%%", *cpuLimit)
-	log.Printf("- Memory limit: %.1f%%", *memoryLimit)
-	log.Printf("- Disk limit: %.1f%%", *diskLimit)
+	log.Info("Starting monitoring with settings:")
+	log.Info("- Check interval: %d seconds", *interval)
+	log.Info("- CPU limit: %.1f%%", *cpuLimit)
+	log.Info("- Memory limit: %.1f%%", *memoryLimit)
+	log.Info("- Disk limit: %.1f%%", *diskLimit)
 
 	monitor.Start()
 } 
\ No newline at end of file

From 7788758373825b4c4a591315e9d861c72e68ac85 Mon Sep 17 00:00:00 2001
From: Christy Jacob <christyjacob4@gmail.com>
Date: Fri, 20 Dec 2024 22:25:21 +0530
Subject: [PATCH 2/2] fix: incident creation and resolution

---
 main.go | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/main.go b/main.go
index 94afcd2..8365faa 100644
--- a/main.go
+++ b/main.go
@@ -4,7 +4,6 @@ import (
 	"encoding/json"
 	"flag"
 	"fmt"
-	"io"
 	"net/http"
 	"os"
 	"path/filepath"
@@ -228,20 +227,9 @@ func (s *SystemMonitor) sendMetric(metric Metric) error {
 	}
 	defer resp.Body.Close()
 
-	// Read response body
-	respBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return fmt.Errorf("failed to read response body: %v", err)
-	}
-
-	// Log response details without colors
 	s.log.Log("Response Status: %s", resp.Status)
-	if len(respBody) > 0 {
-		s.log.Log("Response Body: %s", string(respBody))
-	}
-
 	if resp.StatusCode >= 400 {
-		return fmt.Errorf("request failed with status: %d, body: %s", resp.StatusCode, string(respBody))
+		return fmt.Errorf("request failed with status: %d", resp.StatusCode)
 	}
 
 	return nil