diff --git a/README.md b/README.md index 3daf112b..e2b59b41 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,13 @@ within the pod. Each result is color coded for quicker debugging. Stern can also use the [GELF](https://docs.graylog.org/en/3.2/pages/gelf.html) format and send the logs to a remote GELF capable logging solution like Graylog. -When `graylog-server` is specified, it will not output the log messages except +When `graylog-server` is specified, it will not output the log messages except discovery messages to stdout. We implement the GELF TCP messsage delivery. You can configure count of retries when connection is initialized to your logging server. Stern will try to resend non deliverable GELF messages every 15 seconds for 10 minutes before giving up delivery. The specified `context` is used as host in the -GELF message. +GELF message. The query is a regular expression so the pod name can easily be filtered and you don't need to specify the exact id (for instance omitting the deployment @@ -49,31 +49,34 @@ The `pod` query is a regular expression so you could provide `"web-\w"` to tail ### cli flags -| flag | default | purpose | -|----------------------|---------------------|--------------------------------------------------------------------------------------------------------------------------| -| `--container` | `.*` | Container name when multiple containers in pod (regular expression) | -| `--exclude-container`| | Container name to exclude when multiple containers in pod (regular expression) | -| `--container-state` | `[running,waiting]` | If present, tail containers with status in running, waiting or terminated. | -| `--timestamps` | | Print timestamps | -| `--since` | | Return logs newer than a relative duration like 52, 2m, or 3h. Displays all if omitted | -| `--context` | | Kubernetes context to use. Default to `kubectl config current-context` | -| `--cluster` | | Kubernetes cluster identifier. Will override contextname as host in GELF message. | -| `--exclude` | | Log lines to exclude; specify multiple with additional `--exclude`; (regular expression) | -| `--include` | | Log lines to include; specify multiple with additional `--include`; (regular expression) | -| `--init-containers` | `true` | Include init containers | -| `--namespace` | | Kubernetes namespace to use (multiple namespaces comma seperated). Default to namespace configured in Kubernetes context | -| `--kubeconfig` | `~/.kube/config` | Path to kubeconfig file to use | -| `--all-namespaces` | | If present, tail across all namespaces. A specific namespace is ignored even if specified with --namespace. | -| `--selector` | | Selector (label query) to filter on. If present, default to `.*` for the pod-query. | -| `--tail` | `-1` | The number of lines from the end of the logs to show. Defaults to -1, showing all logs. | -| `--color` | `auto` | Force set color output. `auto`: colorize if tty attached, `always`: always colorize, `never`: never colorize | -| `--output` | `default` | Specify predefined template. Currently support: [default, raw, json] See templates section | -| `--graylog-server` | | Specify Graylog Server address to send logs to via GELF (format: address:port) | -| `--graylog-retries` | `100` | Specify Graylog Server connection retries | -| `--client-timeout` | `3600` | Specify Kubernetes watch client timeout in seconds | -| `--exit-after` | | Specify after how much seconds the program will exit (time duration like 24h, 30m, or 2h). Will not exit if unspecified. | -| `template` | | Template to use for log lines, leave empty to use --output flag | - +| flag | default | purpose | +| --------------------- | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | +| `--container` | `.*` | Container name when multiple containers in pod (regular expression) | +| `--exclude-container` | | Container name to exclude when multiple containers in pod (regular expression) | +| `--container-state` | `[running,waiting]` | If present, tail containers with status in running, waiting or terminated. | +| `--timestamps` | | Print timestamps | +| `--since` | | Return logs newer than a relative duration like 52, 2m, or 3h. Displays all if omitted | +| `--context` | | Kubernetes context to use. Default to `kubectl config current-context` | +| `--cluster` | | Kubernetes cluster identifier. Will override contextname as host in GELF message. | +| `--exclude` | | Log lines to exclude; specify multiple with additional `--exclude`; (regular expression) | +| `--include` | | Log lines to include; specify multiple with additional `--include`; (regular expression) | +| `--init-containers` | `true` | Include init containers | +| `--namespace` | | Kubernetes namespace to use (multiple namespaces comma seperated). Default to namespace configured in Kubernetes context | +| `--kubeconfig` | `~/.kube/config` | Path to kubeconfig file to use | +| `--all-namespaces` | | If present, tail across all namespaces. A specific namespace is ignored even if specified with --namespace. | +| `--selector` | | Selector (label query) to filter on. If present, default to `.*` for the pod-query. | +| `--tail` | `-1` | The number of lines from the end of the logs to show. Defaults to -1, showing all logs. | +| `--color` | `auto` | Force set color output. `auto`: colorize if tty attached, `always`: always colorize, `never`: never colorize | +| `--output` | `default` | Specify predefined template. Currently support: [default, raw, json] See templates section | +| `--graylog-server` | | Specify Graylog Server address to send logs to via GELF (format: address:port) | +| `--graylog-retries` | `100` | Specify Graylog Server connection retries | +| `--graylog-transport` | `tcp` | Specify Graylog GELF transport protocol tcp/http/https | +| `--graylog-port` | `12201` | Specify Graylog GELF transport port | +| `--graylog-cacerts` | `/etc/ssl/certs/ca-certificates.crt` | Specify CA Certs file for Graylog Server https endpoint | +| `--graylog-insecure` | `false` | Do not verify Graylog server certificate | +| `--client-timeout` | `3600` | Specify Kubernetes watch client timeout in seconds | +| `--exit-after` | | Specify after how much seconds the program will exit (time duration like 24h, 30m, or 2h). Will not exit if unspecified. | +| `template` | | Template to use for log lines, leave empty to use --output flag | See `stern --help` for details @@ -83,11 +86,11 @@ used. ### templates -stern supports outputting custom log messages. There are a few predefined +stern supports outputting custom log messages. There are a few predefined templates which you can use by specifying the `--output` flag: | output | description | -|-----------|-------------------------------------------------------------------------------------------------------| +| --------- | ----------------------------------------------------------------------------------------------------- | | `default` | Displays the namespace, pod and container, and decorates it with color depending on --color | | `raw` | Only outputs the log message itself, useful when your logs are json and you want to pipe them to `jq` | | `json` | Marshals the log struct to json. Useful for programmatic purposes | @@ -97,7 +100,7 @@ compiled to a Go template and then used for every log message. This Go template will receive the following struct: | property | type | description | -|-----------------|--------|-----------------------------------------------| +| --------------- | ------ | --------------------------------------------- | | `Message` | string | The log message itself | | `Namespace` | string | The namespace of the pod | | `PodName` | string | The name of the pod | @@ -108,57 +111,67 @@ The following functions are available within the template (besides the [builtin functions](https://golang.org/pkg/text/template/#hdr-Functions)): | func | arguments | description | -|---------|-----------------------|-----------------------------------------------------------------| +| ------- | --------------------- | --------------------------------------------------------------- | | `json` | `object` | Marshal the object and output it as a json text | | `color` | `color.Color, string` | Wrap the text in color (.ContainerColor and .PodColor provided) | If Graylog server address is specified, stern will not: + - print out messages except discovery of containers - colorize the output ## Examples: Tail the `gateway` container running inside of the `envvars` pod on staging + ``` stern envvars --context staging --container gateway ``` Tail the `staging` namespace excluding logs from `istio-proxy` container + ``` stern -n staging --exclude-container istio-proxy . ``` Show auth activity from 15min ago with timestamps + ``` stern auth -t --since 15m ``` Follow the development of `some-new-feature` in minikube + ``` stern some-new-feature --context minikube ``` View pods from another namespace + ``` stern kubernetes-dashboard --namespace kube-system ``` Tail the pods filtered by `run=nginx` label selector across all namespaces + ``` stern --all-namespaces -l run=nginx ``` Follow the `frontend` pods in canary release + ``` stern frontend --selector release=canary ``` Pipe the log message to jq: + ``` stern backend -o json | jq . ``` Only output the log message itself: + ``` stern backend -o raw ``` @@ -175,12 +188,18 @@ Output using a custom template with stern-provided colors: stern --template '{{.Message}} ({{.Namespace}}/{{color .PodColor .PodName}}/{{color .ContainerColor .ContainerName}})' backend ``` -Output all messages to Graylog server +Send all messages to Graylog server via TCP ``` stern ".*" --graylog-server 127.0.0.1:12201 --graylog-retries 50 --all-namespaces ``` +Send all messages to Graylog server via HTTPS + +``` +stern ".*" --graylog-server example.org --graylog-transport https --graylog-port 443 --graylog-retries 50 --all-namespaces +``` + ## Completion Stern supports command-line auto completion for bash or zsh. `stern @@ -213,7 +232,7 @@ source <(stern --completion=zsh) ## Contributing to this repository -Derdanne welcomes contributions to this repository from anyone. Please see +Derdanne welcomes contributions to this repository from anyone. Please see [CONTRIBUTING](CONTRIBUTING.md) for details. -This module based on Wercker Holding BV "Stern" from https://github.com/wercker/stern. \ No newline at end of file +This module based on Wercker Holding BV "Stern" from https://github.com/wercker/stern. diff --git a/cmd/cli.go b/cmd/cli.go index c4db985f..665a2bb8 100644 --- a/cmd/cli.go +++ b/cmd/cli.go @@ -63,6 +63,7 @@ type Options struct { graylogRetries int graylogTransport string graylogPort string + graylogCacerts string graylogInsecure bool clientTimeout int64 exitAfter time.Duration @@ -79,6 +80,7 @@ var opts = &Options{ graylogRetries: 100, graylogTransport: "tcp", graylogPort: "12201", + graylogCacerts: "/etc/ssl/certs/ca-certificates.crt", graylogInsecure: false, clientTimeout: 3600, exitAfter: 0, @@ -115,6 +117,7 @@ func Run() { cmd.Flags().IntVarP(&opts.graylogRetries, "graylog-retries", "r", opts.graylogRetries, "Specify Graylog Server connection retries") cmd.Flags().StringVar(&opts.graylogTransport, "graylog-transport", opts.graylogTransport, "Specify Graylog GELF transport protocol tcp/http/https") cmd.Flags().StringVar(&opts.graylogPort, "graylog-port", opts.graylogPort, "Specify Graylog GELF transport port") + cmd.Flags().StringVar(&opts.graylogCacerts, "graylog-cacerts", opts.graylogCacerts, "Specify CA Certs file for Graylog Server https endpoint") cmd.Flags().BoolVar(&opts.graylogInsecure, "graylog-insecure", opts.graylogInsecure, "Do not verify Graylog server certificate") cmd.Flags().Int64VarP(&opts.clientTimeout, "client-timeout", "T", opts.clientTimeout, "Specify Kubernetes watch client timeout in seconds") cmd.Flags().DurationVarP(&opts.exitAfter, "exit-after", "X", opts.exitAfter, "Specify after how much time the program will exit. Default is not to exit") @@ -325,6 +328,7 @@ func parseConfig(args []string) (*stern.Config, error) { GraylogRetries: opts.graylogRetries, GraylogTransport: opts.graylogTransport, GraylogPort: opts.graylogPort, + GraylogCacerts: opts.graylogCacerts, GraylogInsecure: opts.graylogInsecure, ClientTimeout: opts.clientTimeout, ExitAfter: opts.exitAfter, diff --git a/stern/config.go b/stern/config.go index cda88ff8..12431480 100644 --- a/stern/config.go +++ b/stern/config.go @@ -45,6 +45,7 @@ type Config struct { GraylogRetries int GraylogTransport string GraylogPort string + GraylogCacerts string GraylogInsecure bool ClientTimeout int64 ExitAfter time.Duration diff --git a/stern/main.go b/stern/main.go index f6402073..18b1e53f 100644 --- a/stern/main.go +++ b/stern/main.go @@ -16,9 +16,12 @@ package stern import ( "context" + "crypto/x509" "fmt" + "io/ioutil" "math/rand" "os" + "strconv" "strings" "sync" "time" @@ -31,6 +34,47 @@ import ( // Run starts the main run loop func Run(ctx context.Context, config *Config) error { var gelfWriter *gelf.TCPWriter + var caCertPool *x509.CertPool + + if config.GraylogServer != "" { + + if config.GraylogTransport == "tcp" { + var writerErr error + var sleep time.Duration = time.Second * 10 + + for { + gelfWriter, writerErr = gelf.NewTCPWriter(config.GraylogServer + ":" + config.GraylogPort) + if writerErr != nil { + if config.GraylogRetries--; config.GraylogRetries > 0 { + // Add some randomness to prevent creating a Thundering Herd + jitter := time.Duration(rand.Int63n(int64(sleep))) + sleep = (sleep + jitter/2) + timeNow := time.Now().Format("2006/01/02 15:04:05") + os.Stderr.WriteString(fmt.Sprintf(timeNow+" Could not connect to Graylog Server, next retry in %s. "+strconv.Itoa(config.GraylogRetries)+" retries left. \n", sleep.Round(time.Second))) + time.Sleep(sleep) + gelfWriter = nil + writerErr = nil + continue + } else { + return errors.Wrap(writerErr, "Setup GELF TCP writer failed") + } + } else { + break + } + } + gelfWriter.MaxReconnect = 30 + gelfWriter.ReconnectDelay = 5 + } + + if config.GraylogTransport == "https" { + caCert, err := ioutil.ReadFile(config.GraylogCacerts) + if err != nil { + panic(err) + } + caCertPool = x509.NewCertPool() + caCertPool.AppendCertsFromPEM(caCert) + } + } rand.Seed(time.Now().UnixNano()) clientTimeoutSeconds := int64(config.ClientTimeout) @@ -159,12 +203,13 @@ OUTER: GraylogRetries: config.GraylogRetries, GraylogTransport: config.GraylogTransport, GraylogPort: config.GraylogPort, + GraylogCacerts: config.GraylogCacerts, GraylogInsecure: config.GraylogInsecure, }) tailsMutex.Lock() tails[id] = tail tailsMutex.Unlock() - tail.Start(ctx, clientset.CoreV1().Pods(p.Namespace), gelfWriter, logC) + tail.Start(ctx, clientset.CoreV1().Pods(p.Namespace), gelfWriter, caCertPool, logC) } }() diff --git a/stern/tail.go b/stern/tail.go index e74beeff..c7cacc24 100644 --- a/stern/tail.go +++ b/stern/tail.go @@ -9,12 +9,9 @@ import ( "encoding/json" "fmt" "hash/fnv" - "io/ioutil" - "math/rand" "net/http" "os" "regexp" - "strconv" "text/template" "time" @@ -56,6 +53,7 @@ type TailOptions struct { GraylogRetries int GraylogTransport string GraylogPort string + GraylogCacerts string GraylogInsecure bool } @@ -143,18 +141,11 @@ func (t *Tail) buildMessage(full string, level int32, customExtras map[string]in } // sendGelfMessageHttp sends a GELF message via HTTP to the graylog server -func (t *Tail) sendGelfMessageHttp(gm *gelf.Message) error { +func (t *Tail) sendGelfMessageHttp(gm *gelf.Message, caCertPool *x509.CertPool) error { var httpClient = &http.Client{Timeout: time.Duration(10) * time.Second} var tlsConfig = &tls.Config{} if t.Options.GraylogTransport == "https" { - caCert, err := ioutil.ReadFile("/etc/ssl/certs/ca-certificates.crt") - if err != nil { - panic(err) - } - caCertPool := x509.NewCertPool() - caCertPool.AppendCertsFromPEM(caCert) - tlsConfig = &tls.Config{ RootCAs: caCertPool, InsecureSkipVerify: t.Options.GraylogInsecure, @@ -246,40 +237,9 @@ func (t *Tail) sendGelfMessageTcp(gm *gelf.Message, gelfWriter *gelf.TCPWriter) } // Start starts tailing -func (t *Tail) Start(ctx context.Context, i v1.PodInterface, gelfWriter *gelf.TCPWriter, logC chan<- string) { +func (t *Tail) Start(ctx context.Context, i v1.PodInterface, gelfWriter *gelf.TCPWriter, caCertPool *x509.CertPool, logC chan<- string) { t.podColor, t.containerColor = determineColor(t.PodName) - if t.Options.GraylogServer != "" && t.Options.GraylogTransport == "tcp" { - - var writerErr error - var sleep time.Duration = time.Second * 10 - - for { - gelfWriter, writerErr = gelf.NewTCPWriter(t.Options.GraylogServer + ":" + t.Options.GraylogPort) - if writerErr != nil { - if t.Options.GraylogRetries--; t.Options.GraylogRetries > 0 { - // Add some randomness to prevent creating a Thundering Herd - jitter := time.Duration(rand.Int63n(int64(sleep))) - sleep = (sleep + jitter/2) - timeNow := time.Now().Format("2006/01/02 15:04:05") - os.Stderr.WriteString(fmt.Sprintf(timeNow+" Could not connect to Graylog Server, next retry in %s. "+strconv.Itoa(t.Options.GraylogRetries)+" retries left. \n", sleep.Round(time.Second))) - time.Sleep(sleep) - gelfWriter = nil - writerErr = nil - continue - } else { - os.Stderr.WriteString(fmt.Sprintf("Setup GELF TCP writer failed: %s", writerErr.Error())) - return - } - } else { - break - } - } - gelfWriter.MaxReconnect = 30 - gelfWriter.ReconnectDelay = 5 - } - - // start tailing logs go func() { defer close(t.closed) @@ -399,7 +359,7 @@ func (t *Tail) Start(ctx context.Context, i v1.PodInterface, gelfWriter *gelf.TC gm := t.buildMessage(logLine, 3, customExtras, host) if t.Options.GraylogTransport != "tcp" { - if err := t.sendGelfMessageHttp(gm); err != nil { + if err := t.sendGelfMessageHttp(gm, caCertPool); err != nil { os.Stderr.WriteString(fmt.Sprintf("Error sending GELF message: %s", err)) } } else {