From e32d2fc937b78c773909e26ab1a6cf840dfca543 Mon Sep 17 00:00:00 2001 From: liguozhuang Date: Fri, 1 Nov 2024 17:18:53 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=B9=E5=99=A8=E6=97=A5=E5=BF=97=E9=87=87?= =?UTF-8?q?=E9=9B=86=E6=94=AF=E6=8C=81=20Annotation/Label=20=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=20from=5Fbeginning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/export/doc/en/inputs/container-log.md | 2 ++ internal/export/doc/zh/inputs/container-log.md | 2 ++ internal/plugins/inputs/container/const.go | 3 +++ internal/plugins/inputs/container/container_log.go | 1 + internal/plugins/inputs/container/env.go | 10 ++++++++++ internal/plugins/inputs/container/input.go | 1 + internal/plugins/inputs/container/log.go | 1 + 7 files changed, 20 insertions(+) diff --git a/internal/export/doc/en/inputs/container-log.md b/internal/export/doc/en/inputs/container-log.md index 0132888654..a7e396b827 100644 --- a/internal/export/doc/en/inputs/container-log.md +++ b/internal/export/doc/en/inputs/container-log.md @@ -44,6 +44,7 @@ If you want to customize the collection configuration, it can be done through ad "service" : "", "pipeline": "", "remove_ansi_escape_codes": false, + "from_beginning" : false, "tags" : { "" : "" } @@ -62,6 +63,7 @@ Field explanations: | `service` | string | The service to which the logs belong. The default value is the log source (`source`). | | `pipeline` | string | The Pipeline script for processing the logs. The default value is the script name that matches the log source (`.p`). | | `remove_ansi_escape_codes` | true/false | Enable ANSI codes removal. | +| `from_beginning` | true/false | Whether to collect logs from the begin of the file. | | `multiline_match` | regular expression string | The pattern used for recognizing the first line of a [multiline log match](logging.md#multiline), e.g., `"multiline_match":"^\\d{4}"` indicates that the first line starts with four digits. In regular expression rules, `\d` represents a digit, and the preceding `\` is used for escaping. | | `character_encoding` | string | The character encoding. If the encoding is incorrect, the data may not be viewable. Supported values are `utf-8`, `utf-16le`, `utf-16le`, `gbk`, `gb18030`, or an empty string. The default is empty. | | `tags` | key/value pairs | Additional tags to be added. If there are duplicate keys, the value in this configuration will take precedence ([:octicons-tag-24: Version-1.4.6](../datakit/changelog.md#cl-1.4.6)). | diff --git a/internal/export/doc/zh/inputs/container-log.md b/internal/export/doc/zh/inputs/container-log.md index 94183015d9..ee723a2fec 100644 --- a/internal/export/doc/zh/inputs/container-log.md +++ b/internal/export/doc/zh/inputs/container-log.md @@ -45,6 +45,7 @@ Datakit 支持采集 Kubernetes 和主机容器日志,从数据来源上,可 "service" : "", "pipeline": "", "remove_ansi_escape_codes": false, + "from_beginning" : false, "tags" : { "" : "" } @@ -63,6 +64,7 @@ Datakit 支持采集 Kubernetes 和主机容器日志,从数据来源上,可 | `service` | 字符串 | 日志隶属的服务,默认值为日志来源(source) | | `pipeline` | 字符串 | 适用该日志的 Pipeline 脚本,默认值为与日志来源匹配的脚本名(`.p`) | | `remove_ansi_escape_codes` | true/false | 是否删除日志数据的颜色字符 | +| `from_beginning` | true/false | 是否从文件首部采集日志 | | `multiline_match` | 正则表达式字符串 | 用于[多行日志匹配](logging.md#multiline)时的首行识别,例如 `"multiline_match":"^\\d{4}"` 表示行首是 4 个数字,在正则表达式规则中 `\d` 是数字,前面的 `\` 是用来转义 | | `character_encoding` | 字符串 | 选择编码,如果编码有误会导致数据无法查看,支持 `utf-8`, `utf-16le`, `utf-16le`, `gbk`, `gb18030` or ""。默认为空即可 | | `tags` | key/value 键值对 | 添加额外的 tags,如果已经存在同名的 key 将以此为准([:octicons-tag-24: Version-1.4.6](../datakit/changelog.md#cl-1.4.6) ) | diff --git a/internal/plugins/inputs/container/const.go b/internal/plugins/inputs/container/const.go index c730cab88d..61ad1151f3 100644 --- a/internal/plugins/inputs/container/const.go +++ b/internal/plugins/inputs/container/const.go @@ -64,6 +64,9 @@ const sampleCfg = ` ## Removes ANSI escape codes from text strings. logging_remove_ansi_escape_codes = false + ## Whether to collect logs from the begin of the file. + logging_file_from_beginning = false + ## Search logging interval, default "60s" #logging_search_interval = "" diff --git a/internal/plugins/inputs/container/container_log.go b/internal/plugins/inputs/container/container_log.go index 907968ff3b..3209d1f06b 100644 --- a/internal/plugins/inputs/container/container_log.go +++ b/internal/plugins/inputs/container/container_log.go @@ -60,6 +60,7 @@ func (c *container) tailingLogs(ins *logInstance) { tailer.WithMaxMultilineLifeDuration(c.ipt.LoggingMaxMultilineLifeDuration), tailer.WithRemoveAnsiEscapeCodes(cfg.RemoveAnsiEscapeCodes || c.ipt.LoggingRemoveAnsiEscapeCodes), tailer.WithMaxForceFlushLimit(c.ipt.LoggingForceFlushLimit), + tailer.WithFromBeginning(cfg.FromBeginning || c.ipt.LoggingFileFromBeginning), tailer.WithFileFromBeginningThresholdSize(int64(c.ipt.LoggingFileFromBeginningThresholdSize)), tailer.WithIgnoreDeadLog(defaultActiveDuration), tailer.WithDone(done), diff --git a/internal/plugins/inputs/container/env.go b/internal/plugins/inputs/container/env.go index 6e62e4fda6..e08081c422 100644 --- a/internal/plugins/inputs/container/env.go +++ b/internal/plugins/inputs/container/env.go @@ -48,6 +48,8 @@ func (ipt *Input) GetENVDoc() []*inputs.ENVInfo { {FieldName: "LoggingAutoMultilineExtraPatterns", ENVName: "LOGGING_AUTO_MULTILINE_EXTRA_PATTERNS_JSON", ConfField: "logging_auto_multiline_extra_patterns", Type: doc.JSON, Default: `For more default rules, see [doc](logging.md#auto-multiline)`, Example: `["^\\d{4}-\\d{2}", "^[A-Za-z_]"]`, Desc: `Automatic multi-line pattern pattens list for log collection, supporting manual configuration of multiple multi-line rules`, DescZh: `日志采集的自动多行模式 pattens 列表,支持手动配置多个多行规则`}, {FieldName: "LoggingMaxMultilineLifeDuration", Type: doc.TimeDuration, Default: `3s`, Desc: `Maximum single multi-row life cycle of log collection. At the end of this cycle, existing multi-row data will be emptied and uploaded to avoid accumulation`, DescZh: `日志采集的单次多行最大生命周期,此周期结束将清空和上传现存的多行数据,避免堆积`}, {FieldName: "LoggingRemoveAnsiEscapeCodes", Type: doc.Boolean, Default: `false`, Desc: "Remove `ansi` escape codes and color characters, referred to [`ansi-decode` doc](logging.md#ansi-decode)", DescZh: `日志采集删除包含的颜色字符,详见[日志特殊字符处理说明](logging.md#ansi-decode)`}, + {FieldName: "LoggingFileFromBeginningThresholdSize", Type: doc.Int, Default: `20,000,000`, Desc: "Decide whether or not to from_beginning based on the file size, if the file size is smaller than this value when the file is found, start the collection from the begin", DescZh: `根据文件 size 决定是否 from_beginning,如果发现该文件时,文件 size 小于这个值,就使用 from_beginning 从头部开始采集`}, + {FieldName: "LoggingFileFromBeginning", Type: doc.Boolean, Default: `false`, Desc: "Whether to collect logs from the begin of the file", DescZh: `是否从文件首部采集日志`}, {FieldName: "LoggingForceFlushLimit", Type: doc.Int, Default: `5`, Desc: `If there are consecutive N empty collections, the existing data will be uploaded to prevent memory occupation caused by accumulated`, DescZh: `日志采集上传限制,如果连续 N 次都采集为空,会将现有的数据上传,避免数据积攒占用内存`}, {FieldName: "ContainerMaxConcurrent", Type: doc.Int, Default: `cpu cores + 1`, Desc: `Maximum number of concurrency when collecting container data, recommended to be turned on only when the collection delay is large`, DescZh: `采集容器数据时的最大并发数,推荐只在采集延迟较大时开启`}, {FieldName: "DisableCollectKubeJob", Type: doc.Boolean, Default: `false`, Desc: `Turn off collection of Kubernetes Job resources (including metrics data and object data)`, DescZh: `关闭对 Kubernetes Job 资源的采集(包括指标数据和对象数据)`}, @@ -93,6 +95,7 @@ func (ipt *Input) GetENVDoc() []*inputs.ENVInfo { // ENV_INPUT_CONTAINER_LOGGING_AUTO_MULTILINE_DETECTION: booler // ENV_INPUT_CONTAINER_LOGGING_AUTO_MULTILINE_EXTRA_PATTERNS_JSON : string (JSON string array) // ENV_INPUT_CONTAINER_LOGGING_MAX_MULTILINE_LIFE_DURATION : string ("5s") +// ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING : booler // ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING_THRESHOLD_SIZE : int // ENV_INPUT_CONTAINER_LOGGING_REMOVE_ANSI_ESCAPE_CODES : booler // ENV_INPUT_CONTAINER_TAGS : "a=b,c=d". @@ -315,6 +318,13 @@ func (ipt *Input) ReadEnv(envs map[string]string) { ipt.LoggingMaxMultilineLifeDuration = dur } } + if str, ok := envs["ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING"]; ok { + if b, err := strconv.ParseBool(str); err != nil { + l.Warnf("parse ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING to bool: %s, ignore", err) + } else { + ipt.LoggingFileFromBeginning = b + } + } if str, ok := envs["ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING_THRESHOLD_SIZE"]; ok { if size, err := strconv.ParseInt(str, 10, 64); err != nil { l.Warnf("parse ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING_THRESHOLD_SIZE to int64: %s, ignore", err) diff --git a/internal/plugins/inputs/container/input.go b/internal/plugins/inputs/container/input.go index a49cacf67e..ca142130c7 100644 --- a/internal/plugins/inputs/container/input.go +++ b/internal/plugins/inputs/container/input.go @@ -63,6 +63,7 @@ type Input struct { DeprecatedLoggingMinFlushInterval time.Duration `toml:"logging_min_flush_interval"` LoggingForceFlushLimit int `toml:"logging_force_flush_limit"` LoggingMaxMultilineLifeDuration time.Duration `toml:"logging_max_multiline_life_duration"` + LoggingFileFromBeginning bool `toml:"logging_file_from_beginning"` LoggingFileFromBeginningThresholdSize int `toml:"logging_file_from_beginning_threshold_size"` LoggingRemoveAnsiEscapeCodes bool `toml:"logging_remove_ansi_escape_codes"` diff --git a/internal/plugins/inputs/container/log.go b/internal/plugins/inputs/container/log.go index f294179822..ed2346ebe5 100644 --- a/internal/plugins/inputs/container/log.go +++ b/internal/plugins/inputs/container/log.go @@ -28,6 +28,7 @@ type logConfig struct { Pipeline string `json:"pipeline"` Multiline string `json:"multiline_match"` RemoveAnsiEscapeCodes bool `json:"remove_ansi_escape_codes"` + FromBeginning bool `json:"from_beginning"` Tags map[string]string `json:"tags"` MultilinePatterns []string `json:"-"`