Skip to content

Commit

Permalink
Merge branch 'iss2443-add-frombeginning-for-container-annotations' in…
Browse files Browse the repository at this point in the history
…to 'dev'

容器日志采集支持 Annotation/Label 配置 from_beginning

See merge request cloudcare-tools/datakit!3267
  • Loading branch information
谭彪 committed Nov 1, 2024
2 parents 685b089 + e32d2fc commit 9f48465
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 0 deletions.
2 changes: 2 additions & 0 deletions internal/export/doc/en/inputs/container-log.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ If you want to customize the collection configuration, it can be done through ad
"service" : "<your-service>",
"pipeline": "<your-pipeline.p>",
"remove_ansi_escape_codes": false,
"from_beginning" : false,
"tags" : {
"<some-key>" : "<some_other_value>"
}
Expand All @@ -62,6 +63,7 @@ Field explanations:
| `service` | string | The service to which the logs belong. The default value is the log source (`source`). |
| `pipeline` | string | The Pipeline script for processing the logs. The default value is the script name that matches the log source (`<source>.p`). |
| `remove_ansi_escape_codes` | true/false | Enable ANSI codes removal. |
| `from_beginning` | true/false | Whether to collect logs from the begin of the file. |
| `multiline_match` | regular expression string | The pattern used for recognizing the first line of a [multiline log match](logging.md#multiline), e.g., `"multiline_match":"^\\d{4}"` indicates that the first line starts with four digits. In regular expression rules, `\d` represents a digit, and the preceding `\` is used for escaping. |
| `character_encoding` | string | The character encoding. If the encoding is incorrect, the data may not be viewable. Supported values are `utf-8`, `utf-16le`, `utf-16le`, `gbk`, `gb18030`, or an empty string. The default is empty. |
| `tags` | key/value pairs | Additional tags to be added. If there are duplicate keys, the value in this configuration will take precedence ([:octicons-tag-24: Version-1.4.6](../datakit/changelog.md#cl-1.4.6)). |
Expand Down
2 changes: 2 additions & 0 deletions internal/export/doc/zh/inputs/container-log.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Datakit 支持采集 Kubernetes 和主机容器日志,从数据来源上,可
"service" : "<your-service>",
"pipeline": "<your-pipeline.p>",
"remove_ansi_escape_codes": false,
"from_beginning" : false,
"tags" : {
"<some-key>" : "<some_other_value>"
}
Expand All @@ -63,6 +64,7 @@ Datakit 支持采集 Kubernetes 和主机容器日志,从数据来源上,可
| `service` | 字符串 | 日志隶属的服务,默认值为日志来源(source) |
| `pipeline` | 字符串 | 适用该日志的 Pipeline 脚本,默认值为与日志来源匹配的脚本名(`<source>.p`|
| `remove_ansi_escape_codes` | true/false | 是否删除日志数据的颜色字符 |
| `from_beginning` | true/false | 是否从文件首部采集日志 |
| `multiline_match` | 正则表达式字符串 | 用于[多行日志匹配](logging.md#multiline)时的首行识别,例如 `"multiline_match":"^\\d{4}"` 表示行首是 4 个数字,在正则表达式规则中 `\d` 是数字,前面的 `\` 是用来转义 |
| `character_encoding` | 字符串 | 选择编码,如果编码有误会导致数据无法查看,支持 `utf-8`, `utf-16le`, `utf-16le`, `gbk`, `gb18030` or ""。默认为空即可 |
| `tags` | key/value 键值对 | 添加额外的 tags,如果已经存在同名的 key 将以此为准([:octicons-tag-24: Version-1.4.6](../datakit/changelog.md#cl-1.4.6)|
Expand Down
3 changes: 3 additions & 0 deletions internal/plugins/inputs/container/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ const sampleCfg = `
## Removes ANSI escape codes from text strings.
logging_remove_ansi_escape_codes = false
## Whether to collect logs from the begin of the file.
logging_file_from_beginning = false
## Search logging interval, default "60s"
#logging_search_interval = ""
Expand Down
1 change: 1 addition & 0 deletions internal/plugins/inputs/container/container_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ func (c *container) tailingLogs(ins *logInstance) {
tailer.WithMaxMultilineLifeDuration(c.ipt.LoggingMaxMultilineLifeDuration),
tailer.WithRemoveAnsiEscapeCodes(cfg.RemoveAnsiEscapeCodes || c.ipt.LoggingRemoveAnsiEscapeCodes),
tailer.WithMaxForceFlushLimit(c.ipt.LoggingForceFlushLimit),
tailer.WithFromBeginning(cfg.FromBeginning || c.ipt.LoggingFileFromBeginning),
tailer.WithFileFromBeginningThresholdSize(int64(c.ipt.LoggingFileFromBeginningThresholdSize)),
tailer.WithIgnoreDeadLog(defaultActiveDuration),
tailer.WithDone(done),
Expand Down
10 changes: 10 additions & 0 deletions internal/plugins/inputs/container/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ func (ipt *Input) GetENVDoc() []*inputs.ENVInfo {
{FieldName: "LoggingAutoMultilineExtraPatterns", ENVName: "LOGGING_AUTO_MULTILINE_EXTRA_PATTERNS_JSON", ConfField: "logging_auto_multiline_extra_patterns", Type: doc.JSON, Default: `For more default rules, see [doc](logging.md#auto-multiline)`, Example: `["^\\d{4}-\\d{2}", "^[A-Za-z_]"]`, Desc: `Automatic multi-line pattern pattens list for log collection, supporting manual configuration of multiple multi-line rules`, DescZh: `日志采集的自动多行模式 pattens 列表,支持手动配置多个多行规则`},
{FieldName: "LoggingMaxMultilineLifeDuration", Type: doc.TimeDuration, Default: `3s`, Desc: `Maximum single multi-row life cycle of log collection. At the end of this cycle, existing multi-row data will be emptied and uploaded to avoid accumulation`, DescZh: `日志采集的单次多行最大生命周期,此周期结束将清空和上传现存的多行数据,避免堆积`},
{FieldName: "LoggingRemoveAnsiEscapeCodes", Type: doc.Boolean, Default: `false`, Desc: "Remove `ansi` escape codes and color characters, referred to [`ansi-decode` doc](logging.md#ansi-decode)", DescZh: `日志采集删除包含的颜色字符,详见[日志特殊字符处理说明](logging.md#ansi-decode)`},
{FieldName: "LoggingFileFromBeginningThresholdSize", Type: doc.Int, Default: `20,000,000`, Desc: "Decide whether or not to from_beginning based on the file size, if the file size is smaller than this value when the file is found, start the collection from the begin", DescZh: `根据文件 size 决定是否 from_beginning,如果发现该文件时,文件 size 小于这个值,就使用 from_beginning 从头部开始采集`},
{FieldName: "LoggingFileFromBeginning", Type: doc.Boolean, Default: `false`, Desc: "Whether to collect logs from the begin of the file", DescZh: `是否从文件首部采集日志`},
{FieldName: "LoggingForceFlushLimit", Type: doc.Int, Default: `5`, Desc: `If there are consecutive N empty collections, the existing data will be uploaded to prevent memory occupation caused by accumulated`, DescZh: `日志采集上传限制,如果连续 N 次都采集为空,会将现有的数据上传,避免数据积攒占用内存`},
{FieldName: "ContainerMaxConcurrent", Type: doc.Int, Default: `cpu cores + 1`, Desc: `Maximum number of concurrency when collecting container data, recommended to be turned on only when the collection delay is large`, DescZh: `采集容器数据时的最大并发数,推荐只在采集延迟较大时开启`},
{FieldName: "DisableCollectKubeJob", Type: doc.Boolean, Default: `false`, Desc: `Turn off collection of Kubernetes Job resources (including metrics data and object data)`, DescZh: `关闭对 Kubernetes Job 资源的采集(包括指标数据和对象数据)`},
Expand Down Expand Up @@ -93,6 +95,7 @@ func (ipt *Input) GetENVDoc() []*inputs.ENVInfo {
// ENV_INPUT_CONTAINER_LOGGING_AUTO_MULTILINE_DETECTION: booler
// ENV_INPUT_CONTAINER_LOGGING_AUTO_MULTILINE_EXTRA_PATTERNS_JSON : string (JSON string array)
// ENV_INPUT_CONTAINER_LOGGING_MAX_MULTILINE_LIFE_DURATION : string ("5s")
// ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING : booler
// ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING_THRESHOLD_SIZE : int
// ENV_INPUT_CONTAINER_LOGGING_REMOVE_ANSI_ESCAPE_CODES : booler
// ENV_INPUT_CONTAINER_TAGS : "a=b,c=d".
Expand Down Expand Up @@ -315,6 +318,13 @@ func (ipt *Input) ReadEnv(envs map[string]string) {
ipt.LoggingMaxMultilineLifeDuration = dur
}
}
if str, ok := envs["ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING"]; ok {
if b, err := strconv.ParseBool(str); err != nil {
l.Warnf("parse ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING to bool: %s, ignore", err)
} else {
ipt.LoggingFileFromBeginning = b
}
}
if str, ok := envs["ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING_THRESHOLD_SIZE"]; ok {
if size, err := strconv.ParseInt(str, 10, 64); err != nil {
l.Warnf("parse ENV_INPUT_CONTAINER_LOGGING_FILE_FROM_BEGINNING_THRESHOLD_SIZE to int64: %s, ignore", err)
Expand Down
1 change: 1 addition & 0 deletions internal/plugins/inputs/container/input.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ type Input struct {
DeprecatedLoggingMinFlushInterval time.Duration `toml:"logging_min_flush_interval"`
LoggingForceFlushLimit int `toml:"logging_force_flush_limit"`
LoggingMaxMultilineLifeDuration time.Duration `toml:"logging_max_multiline_life_duration"`
LoggingFileFromBeginning bool `toml:"logging_file_from_beginning"`
LoggingFileFromBeginningThresholdSize int `toml:"logging_file_from_beginning_threshold_size"`
LoggingRemoveAnsiEscapeCodes bool `toml:"logging_remove_ansi_escape_codes"`

Expand Down
1 change: 1 addition & 0 deletions internal/plugins/inputs/container/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type logConfig struct {
Pipeline string `json:"pipeline"`
Multiline string `json:"multiline_match"`
RemoveAnsiEscapeCodes bool `json:"remove_ansi_escape_codes"`
FromBeginning bool `json:"from_beginning"`
Tags map[string]string `json:"tags"`

MultilinePatterns []string `json:"-"`
Expand Down

0 comments on commit 9f48465

Please sign in to comment.