From aac65b7295e8360d6fccc690a18e6f3a398c517f Mon Sep 17 00:00:00 2001 From: liguozhuang Date: Fri, 18 Oct 2024 18:22:44 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=20KubernetesPrometheus=20?= =?UTF-8?q?=E9=87=87=E9=9B=86=E6=80=A7=E8=83=BD=E5=92=8C=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E7=BB=86=E8=8A=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go.mod | 15 +- go.sum | 21 +- .../inputs/kubernetesprometheus/endpoints.go | 17 +- .../kubernetesprometheus/endpoints_parser.go | 85 +-- .../endpoints_parser_test.go | 8 + .../inputs/kubernetesprometheus/instance.go | 10 + .../inputs/kubernetesprometheus/node.go | 17 +- .../kubernetesprometheus/node_parser.go | 19 +- .../inputs/kubernetesprometheus/pod.go | 17 +- .../inputs/kubernetesprometheus/pod_parser.go | 20 +- .../inputs/kubernetesprometheus/prom.go | 49 +- .../inputs/kubernetesprometheus/scrape.go | 7 +- .../inputs/kubernetesprometheus/service.go | 17 +- internal/prom/options.go | 24 +- internal/prom/prom.go | 33 +- internal/prom/util.go | 16 +- internal/prom/util_test.go | 67 ++ internal/promscrape/options.go | 113 +++ internal/promscrape/parser.go | 256 +++++++ internal/promscrape/rows.go | 676 ++++++++++++++++++ internal/promscrape/scraper.go | 154 ++++ internal/promscrape/scraper_test.go | 79 ++ .../pipeline/ptinput/ptwindow/pt_window.go | 15 +- .../cliutils/pprofparser/cfg/cfg.go | 6 + .../pprofparser/domain/events/type.go | 24 +- .../pprofparser/domain/languages/lang.go | 6 + .../pprofparser/domain/parameter/parameter.go | 7 + .../pprofparser/domain/pprof/frame.go | 5 + .../pprofparser/domain/pprof/summary.go | 6 + .../pprofparser/domain/quantity/kind.go | 6 + .../pprofparser/domain/quantity/quantity.go | 5 + .../pprofparser/domain/quantity/unit.go | 5 + .../pprofparser/domain/tracing/tracing.go | 6 + .../service/parsing/aggregators.go | 5 + .../pprofparser/service/parsing/collapse.go | 17 +- .../pprofparser/service/parsing/display.go | 5 + .../pprofparser/service/parsing/metadata.go | 5 + .../pprofparser/service/parsing/parser.go | 6 + .../pprofparser/service/parsing/pprof.go | 16 +- .../pprofparser/service/parsing/sampletype.go | 20 +- .../pprofparser/service/storage/disk.go | 5 + .../pprofparser/service/storage/oss.go | 5 + .../pprofparser/service/storage/storage.go | 6 + .../tools/filepathtoolkit/format.go | 6 + .../pprofparser/tools/filepathtoolkit/stat.go | 5 + .../pprofparser/tools/jsontoolkit/cast.go | 5 + .../pprofparser/tools/jsontoolkit/json.go | 6 + .../pprofparser/tools/logtoolkit/logger.go | 6 + .../pprofparser/tools/mathtoolkit/math.go | 6 + .../pprofparser/tools/parsetoolkit/pprof.go | 6 + .../klauspost/compress/.goreleaser.yml | 22 +- .../github.com/klauspost/compress/README.md | 60 +- .../klauspost/compress/flate/deflate.go | 31 +- .../klauspost/compress/flate/fast_encoder.go | 23 - .../klauspost/compress/flate/inflate.go | 66 +- .../klauspost/compress/flate/inflate_gen.go | 34 +- .../klauspost/compress/flate/level5.go | 398 +++++++++++ .../compress/flate/matchlen_amd64.go | 16 + .../klauspost/compress/flate/matchlen_amd64.s | 66 ++ .../compress/flate/matchlen_generic.go | 33 + .../klauspost/compress/fse/bitwriter.go | 3 +- .../klauspost/compress/fse/compress.go | 5 +- .../klauspost/compress/gzip/gunzip.go | 6 + .../klauspost/compress/gzip/gzip.go | 21 + .../klauspost/compress/huff0/bitwriter.go | 3 +- .../klauspost/compress/huff0/bytereader.go | 44 -- .../klauspost/compress/huff0/compress.go | 25 +- .../klauspost/compress/huff0/huff0.go | 4 +- .../compress/internal/snapref/encode_other.go | 2 +- vendor/github.com/klauspost/compress/s2sx.mod | 2 +- .../klauspost/compress/zlib/reader.go | 32 +- .../klauspost/compress/zlib/writer.go | 18 +- .../klauspost/compress/zstd/README.md | 2 +- .../klauspost/compress/zstd/bitreader.go | 34 +- .../klauspost/compress/zstd/bitwriter.go | 3 +- .../klauspost/compress/zstd/blockdec.go | 3 + .../klauspost/compress/zstd/blockenc.go | 49 +- .../klauspost/compress/zstd/decodeheader.go | 56 +- .../klauspost/compress/zstd/decoder.go | 2 +- .../klauspost/compress/zstd/dict.go | 410 ++++++++++- .../klauspost/compress/zstd/enc_best.go | 94 ++- .../klauspost/compress/zstd/enc_better.go | 30 +- .../klauspost/compress/zstd/encoder.go | 13 +- .../compress/zstd/encoder_options.go | 6 +- .../klauspost/compress/zstd/frameenc.go | 6 +- .../compress/zstd/fse_decoder_generic.go | 11 +- .../zstd/internal/xxhash/xxhash_arm64.s | 4 +- .../klauspost/compress/zstd/matchlen_amd64.s | 10 +- .../klauspost/compress/zstd/seqdec.go | 17 +- .../klauspost/compress/zstd/seqdec_amd64.s | 264 ++++--- .../klauspost/compress/zstd/seqdec_generic.go | 2 +- .../klauspost/compress/zstd/snappy.go | 5 +- .../github.com/mattn/go-runewidth/.travis.yml | 16 - .../github.com/mattn/go-runewidth/README.md | 2 +- .../github.com/mattn/go-runewidth/go.test.sh | 12 - .../mattn/go-runewidth/runewidth.go | 93 ++- .../mattn/go-runewidth/runewidth_appengine.go | 1 + .../mattn/go-runewidth/runewidth_js.go | 4 +- .../mattn/go-runewidth/runewidth_posix.go | 5 +- .../mattn/go-runewidth/runewidth_windows.go | 4 +- vendor/github.com/rivo/uniseg/README.md | 23 +- vendor/github.com/rivo/uniseg/doc.go | 77 +- .../github.com/rivo/uniseg/eastasianwidth.go | 5 +- .../rivo/uniseg/emojipresentation.go | 285 ++++++++ .../github.com/rivo/uniseg/gen_properties.go | 120 ++-- vendor/github.com/rivo/uniseg/grapheme.go | 167 +++-- .../rivo/uniseg/graphemeproperties.go | 2 +- .../github.com/rivo/uniseg/graphemerules.go | 37 +- vendor/github.com/rivo/uniseg/line.go | 24 +- .../github.com/rivo/uniseg/lineproperties.go | 5 +- vendor/github.com/rivo/uniseg/properties.go | 13 +- vendor/github.com/rivo/uniseg/sentence.go | 2 +- .../rivo/uniseg/sentenceproperties.go | 5 +- vendor/github.com/rivo/uniseg/step.go | 117 ++- vendor/github.com/rivo/uniseg/width.go | 54 ++ vendor/github.com/rivo/uniseg/word.go | 2 +- .../github.com/rivo/uniseg/wordproperties.go | 4 +- vendor/github.com/valyala/fastjson/LICENSE | 22 + .../valyala/fastjson/fastfloat/parse.go | 499 +++++++++++++ vendor/modules.txt | 15 +- 120 files changed, 4609 insertions(+), 882 deletions(-) create mode 100644 internal/promscrape/options.go create mode 100644 internal/promscrape/parser.go create mode 100644 internal/promscrape/rows.go create mode 100644 internal/promscrape/scraper.go create mode 100644 internal/promscrape/scraper_test.go create mode 100644 vendor/github.com/klauspost/compress/flate/matchlen_amd64.go create mode 100644 vendor/github.com/klauspost/compress/flate/matchlen_amd64.s create mode 100644 vendor/github.com/klauspost/compress/flate/matchlen_generic.go delete mode 100644 vendor/github.com/klauspost/compress/huff0/bytereader.go delete mode 100644 vendor/github.com/mattn/go-runewidth/.travis.yml delete mode 100644 vendor/github.com/mattn/go-runewidth/go.test.sh create mode 100644 vendor/github.com/rivo/uniseg/emojipresentation.go create mode 100644 vendor/github.com/rivo/uniseg/width.go create mode 100644 vendor/github.com/valyala/fastjson/LICENSE create mode 100644 vendor/github.com/valyala/fastjson/fastfloat/parse.go diff --git a/go.mod b/go.mod index e97a66d5a5..433ecd1f89 100644 --- a/go.mod +++ b/go.mod @@ -56,7 +56,7 @@ require ( github.com/itchyny/timefmt-go v0.1.5 // indirect github.com/jessevdk/go-flags v1.5.0 github.com/kardianos/service v1.2.1 - github.com/klauspost/compress v1.16.7 // indirect + github.com/klauspost/compress v1.17.9 // indirect github.com/mssola/user_agent v0.6.0 // indirect github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 github.com/openzipkin/zipkin-go v0.2.2 @@ -104,7 +104,6 @@ require ( github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect github.com/DataDog/datadog-go/v5 v5.1.1 // indirect github.com/DataDog/sketches-go v1.4.1 // indirect - github.com/GuanceCloud/kubernetes v0.0.0-20230801080916-ca299820872b github.com/GuanceCloud/mdcheck v0.0.0-20230718065937-44c6728c995f github.com/GuanceCloud/toml v1.2.5 github.com/GuanceCloud/tracing-protos v0.0.0-20230619071516-54c8cff1b6b3 @@ -235,7 +234,7 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.19 // indirect - github.com/mattn/go-runewidth v0.0.13 // indirect + github.com/mattn/go-runewidth v0.0.14 // indirect github.com/mattn/go-tty v0.0.3 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 github.com/mitchellh/copystructure v1.2.0 // indirect @@ -272,7 +271,7 @@ require ( github.com/pyroscope-io/jfr-parser v0.5.2 // indirect github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect - github.com/rivo/uniseg v0.3.1 // indirect + github.com/rivo/uniseg v0.4.3 // indirect github.com/rs/xid v1.6.0 // indirect github.com/ryanuber/go-glob v1.0.0 // indirect github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414 @@ -313,7 +312,7 @@ require ( golang.org/x/sync v0.4.0 // indirect golang.org/x/time v0.6.0 golang.org/x/tools v0.14.0 // indirect - golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect + golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f // indirect google.golang.org/protobuf v1.34.2 @@ -349,7 +348,9 @@ require ( require ( github.com/DataDog/ebpf-manager v0.2.16 - github.com/GuanceCloud/cliutils v1.1.21 + github.com/GuanceCloud/cliutils v1.1.22-0.20240930074036-255c78c086fd + github.com/GuanceCloud/kubernetes v0.0.0-20230801080916-ca299820872b + github.com/GuanceCloud/zipstream v0.1.0 // indirect github.com/andrewkroh/sys v0.0.0-20151128191922-287798fe3e43 github.com/brianvoe/gofakeit/v6 v6.28.0 github.com/cilium/ebpf v0.11.0 @@ -374,7 +375,6 @@ require ( ) require ( - github.com/GuanceCloud/zipstream v0.1.0 // indirect github.com/VictoriaMetrics/easyproto v0.1.4 // indirect github.com/avast/retry-go/v4 v4.1.0 // indirect github.com/avvmoto/buf-readerat v0.0.0-20171115124131-a17c8cb89270 // indirect @@ -387,6 +387,7 @@ require ( github.com/outcaste-io/ristretto v0.2.1 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4 // indirect + github.com/valyala/fastjson v1.6.3 ) replace ( diff --git a/go.sum b/go.sum index de9ea1a8d5..71c0823111 100644 --- a/go.sum +++ b/go.sum @@ -143,8 +143,8 @@ github.com/DataDog/sketches-go v1.4.1 h1:j5G6as+9FASM2qC36lvpvQAj9qsv/jUs3FtO8Cw github.com/DataDog/sketches-go v1.4.1/go.mod h1:xJIXldczJyyjnbDop7ZZcLxJdV3+7Kra7H1KMgpgkLk= github.com/GuanceCloud/client_model v0.0.0-20230418154757-93bd4e878a5e h1:i34dA4kiRTfG+KdvkIXCLPDduarVeFlQhGDD3TefgS4= github.com/GuanceCloud/client_model v0.0.0-20230418154757-93bd4e878a5e/go.mod h1:PMnE48aPzuRu83FmWZugC0O3d54ZupJd/MmiaYxz8sM= -github.com/GuanceCloud/cliutils v1.1.21 h1:UkENug9Kg4GVTq1ITWIz2KmIPIvpNrZxKKUmRxWWFfA= -github.com/GuanceCloud/cliutils v1.1.21/go.mod h1:5bIAZ9yA6l7W8MMUKw0+SIZJRpmEwxM6ZYLy4vweTgU= +github.com/GuanceCloud/cliutils v1.1.22-0.20240930074036-255c78c086fd h1:KxbB1a1NybivPLnI+xVcR0WPPXlI1+jCyCmPMJ5LnpE= +github.com/GuanceCloud/cliutils v1.1.22-0.20240930074036-255c78c086fd/go.mod h1:5bIAZ9yA6l7W8MMUKw0+SIZJRpmEwxM6ZYLy4vweTgU= github.com/GuanceCloud/confd v0.1.101 h1:yjHgfl6YzAlTbFOFMTE4ERpFJzIyovOW7ZFc2/ZssL0= github.com/GuanceCloud/confd v0.1.101/go.mod h1:o0opIwOX+yNwV9nh56x5ymFMJ+YBD8JuPxBJ7a1mEmo= github.com/GuanceCloud/dockertest/v3 v3.9.4 h1:ScSNhfA2HSNLfrYoNd1KSRxkrymlKiBE60g4f6eUoOk= @@ -1578,8 +1578,8 @@ github.com/klauspost/compress v1.12.2/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8 github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= @@ -1696,8 +1696,9 @@ github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzp github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-runewidth v0.0.6/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= +github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/mattn/go-sqlite3 v1.11.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= @@ -2062,8 +2063,8 @@ github.com/retailnext/hllpp v1.0.1-0.20180308014038-101a6d2f8b52/go.mod h1:RDpi1 github.com/rivo/tview v0.0.0-20220129131435-1f7581b67bd1 h1:rZQHaUDlzupPiNSXPKG+NwRvSm5I6gVS96xTOBoseWc= github.com/rivo/tview v0.0.0-20220129131435-1f7581b67bd1/go.mod h1:WIfMkQNY+oq/mWwtsjOYHIZBuwthioY2srOmljJkTnk= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rivo/uniseg v0.3.1 h1:SDPP7SHNl1L7KrEFCSJslJ/DM9DT02Nq2C61XrfHMmk= -github.com/rivo/uniseg v0.3.1/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/rivo/uniseg v0.4.3 h1:utMvzDsuh3suAEnhH0RdHmoPbU648o6CvXxTx4SBMOw= +github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/robertkrimen/godocdown v0.0.0-20130622164427-0bfa04905481/go.mod h1:C9WhFzY47SzYBIvzFqSvHIR6ROgDo4TtdTuRaOMjF/s= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= @@ -2309,6 +2310,8 @@ github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKn github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.29.0/go.mod h1:2rsYD01CKFrjjsvFxx75KlEUNpWNBY9JWD3K/7o2Cus= +github.com/valyala/fastjson v1.6.3 h1:tAKFnnwmeMGPbwJ7IwxcTPCNr3uIzoIj3/Fh90ra4xc= +github.com/valyala/fastjson v1.6.3/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= @@ -3067,8 +3070,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f h1:uF6paiQQebLeSXkrTqHqz0MXhXXS1KgF41eUdBNvxK0= -golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= +golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= +golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= golang.zx2c4.com/wireguard v0.0.20200121/go.mod h1:P2HsVp8SKwZEufsnezXZA4GRX/T49/HlU7DGuelXsU4= golang.zx2c4.com/wireguard/wgctrl v0.0.0-20200205215550-e35592f146e4/go.mod h1:UdS9frhv65KTfwxME1xE8+rHYoFpbm36gOud1GhBe9c= gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= diff --git a/internal/plugins/inputs/kubernetesprometheus/endpoints.go b/internal/plugins/inputs/kubernetesprometheus/endpoints.go index 82232d0702..f09f179f40 100644 --- a/internal/plugins/inputs/kubernetesprometheus/endpoints.go +++ b/internal/plugins/inputs/kubernetesprometheus/endpoints.go @@ -12,7 +12,7 @@ import ( "strings" dkio "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/io" - iprom "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/prom" + "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/promscrape" corev1 "k8s.io/api/core/v1" "k8s.io/client-go/informers" infov1 "k8s.io/client-go/informers/core/v1" @@ -106,22 +106,21 @@ func (e *Endpoints) process(ctx context.Context) bool { return true } - if e.scrape.matchesKey(key, endpointsFeature(ep)) { + if shouldSkipEndpoints(ep) { return true } - klog.Infof("found endpoints %s", key) + if e.scrape.matchesKey(key, endpointsFeature(ep)) { + return true + } + klog.Infof("found new endpoints %s", key) e.terminateScrape(key) e.startScrape(ctx, key, ep) return true } func (e *Endpoints) startScrape(ctx context.Context, key string, item *corev1.Endpoints) { - if shouldSkipEndpoints(item) { - return - } - nodeName, nodeNameExists := nodeNameFrom(ctx) feature := endpointsFeature(item) @@ -152,8 +151,8 @@ func (e *Endpoints) startScrape(ctx context.Context, key string, item *corev1.En opts := buildPromOptions( RoleEndpoints, key, e.feeder, - iprom.WithMeasurementName(cfg.measurement), - iprom.WithTags(cfg.tags)) + promscrape.WithMeasurement(cfg.measurement), + promscrape.WithExtraTags(cfg.tags)) if tlsOpts, err := buildPromOptionsWithAuth(&ins.Auth); err != nil { klog.Warnf("endpoints %s has unexpected tls config %s", key, err) diff --git a/internal/plugins/inputs/kubernetesprometheus/endpoints_parser.go b/internal/plugins/inputs/kubernetesprometheus/endpoints_parser.go index 6753deda61..eaa414672e 100644 --- a/internal/plugins/inputs/kubernetesprometheus/endpoints_parser.go +++ b/internal/plugins/inputs/kubernetesprometheus/endpoints_parser.go @@ -6,7 +6,6 @@ package kubernetesprometheus import ( - "fmt" "regexp" "strconv" @@ -141,6 +140,19 @@ var ( return "" }, }, + { + // e.g. integer "8080" + key: newKeyMatcherWithRegexp(regexp.MustCompile(`^(\d*)$`)), + fn: func(item *corev1.EndpointPort, args []string) string { + if len(args) != 1 { + return "" + } + if strconv.Itoa(int(item.Port)) == args[0] { + return args[0] + } + return "" + }, + }, } ) @@ -168,60 +180,49 @@ func (p *endpointsParser) parsePromConfig(ins *Instance) ([]*basePromConfig, err var configs []*basePromConfig for _, set := range p.item.Subsets { - for addressIdx, address := range set.Addresses { - // length 5 - oldElems := []string{ins.Scheme, ins.Address, ins.Port, ins.Path, ins.Measurement} - newElems := deepCopySlice(oldElems) - - tagKeys := []string{} - for k, v := range ins.Tags { - tagKeys = append(tagKeys, k) - newElems = append(newElems, v) - } + port := ins.Port + if matched, res := p.matchPort(set.Ports, port); matched && res != "" { + port = res + } else { + // not found port + continue + } - for idx, elem := range newElems { - if matched, res := p.matchEndpoints(elem); matched && res != "" { - newElems[idx] = res - continue - } - if matched, res := p.matchAddress(&set.Addresses[addressIdx], elem); matched && res != "" { - newElems[idx] = res + for addressIdx, address := range set.Addresses { + elems := []string{ins.Scheme, ins.Address, ins.Path, ins.Measurement} + for idx := range elems { + if matched, res := p.matchEndpoints(elems[idx]); matched && res != "" { + elems[idx] = res continue } - if matched, res := p.matchPort(set.Ports, elem); matched && res != "" { - newElems[idx] = res - continue + if matched, res := p.matchAddress(&set.Addresses[addressIdx], elems[idx]); matched && res != "" { + elems[idx] = res } - newElems[idx] = elem } - u, err := buildURLWithParams(newElems[0], newElems[1], newElems[2], newElems[3], ins.Params) + u, err := buildURLWithParams(elems[0], elems[1], port, elems[2], ins.Params) if err != nil { return nil, err } - measurement := newElems[4] + measurement := elems[3] tags := map[string]string{} - - if len(tagKeys)+len(oldElems) != len(newElems) { - return nil, fmt.Errorf("unexpected tags length %d-%d", len(tagKeys), len(newElems)-len(oldElems)) - } - - for idx, k := range tagKeys { - tags[k] = newElems[idx+len(oldElems)] - } - - for k, v := range tags { - switch v { - case MateInstanceTag: - tags[k] = u.Host - case MateHostTag: - if host := splitHost(u.Host); host != "" { - tags[k] = host + for k, v := range ins.Tags { + if matched, res := matchInstanceOrHost(v, u.Host); matched { + if res != "" { + tags[k] = res } - default: - // nil + continue + } + if matched, res := p.matchEndpoints(v); matched && res != "" { + tags[k] = res + continue + } + if matched, res := p.matchAddress(&set.Addresses[addressIdx], v); matched && res != "" { + tags[k] = res + continue } + tags[k] = v } nodeName := "" diff --git a/internal/plugins/inputs/kubernetesprometheus/endpoints_parser_test.go b/internal/plugins/inputs/kubernetesprometheus/endpoints_parser_test.go index c302e82eaf..e6d8ac41a1 100644 --- a/internal/plugins/inputs/kubernetesprometheus/endpoints_parser_test.go +++ b/internal/plugins/inputs/kubernetesprometheus/endpoints_parser_test.go @@ -132,6 +132,14 @@ func TestEndpointsMeta(t *testing.T) { matched, res = pr.matchPort(obj, "__kubernetes_endpoints_port_nonexistent_number") assert.Equal(t, false, matched) assert.Equal(t, "", res) + + matched, res = pr.matchPort(obj, "9090") + assert.Equal(t, true, matched) + assert.Equal(t, "9090", res) + + matched, res = pr.matchPort(obj, "19090") + assert.Equal(t, false, matched) + assert.Equal(t, "", res) }) t.Run("endpoints-scrape", func(t *testing.T) { diff --git a/internal/plugins/inputs/kubernetesprometheus/instance.go b/internal/plugins/inputs/kubernetesprometheus/instance.go index 78afe5947c..9ab4b3f8a6 100644 --- a/internal/plugins/inputs/kubernetesprometheus/instance.go +++ b/internal/plugins/inputs/kubernetesprometheus/instance.go @@ -259,3 +259,13 @@ func checkPaused(ctx context.Context, election bool) bool { paused, exists := pauseFrom(ctx) return exists && paused } + +func matchInstanceOrHost(str, host string) (bool, string) { + switch str { + case MateInstanceTag: + return true, host + case MateHostTag: + return true, splitHost(host) + } + return false, str +} diff --git a/internal/plugins/inputs/kubernetesprometheus/node.go b/internal/plugins/inputs/kubernetesprometheus/node.go index 101d7b76f7..dcfcce91cd 100644 --- a/internal/plugins/inputs/kubernetesprometheus/node.go +++ b/internal/plugins/inputs/kubernetesprometheus/node.go @@ -10,7 +10,7 @@ import ( "fmt" dkio "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/io" - iprom "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/prom" + "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/promscrape" corev1 "k8s.io/api/core/v1" "k8s.io/client-go/informers" infov1 "k8s.io/client-go/informers/core/v1" @@ -109,22 +109,21 @@ func (n *Node) process(ctx context.Context) bool { return true } - if n.scrape.matchesKey(key, nodeFeature(node)) { + if shouldSkipNode(node) { return true } - klog.Infof("found node %s", key) + if n.scrape.matchesKey(key, nodeFeature(node)) { + return true + } + klog.Infof("found new node %s", key) n.terminateScrape(key) n.startScrape(ctx, key, node) return true } func (n *Node) startScrape(ctx context.Context, key string, item *corev1.Node) { - if shouldSkipNode(item) { - return - } - feature := nodeFeature(item) checkPausedFunc := func() bool { return checkPaused(ctx, false /* not use election */) @@ -154,8 +153,8 @@ func (n *Node) startScrape(ctx context.Context, key string, item *corev1.Node) { opts := buildPromOptions( RoleNode, key, n.feeder, - iprom.WithMeasurementName(cfg.measurement), - iprom.WithTags(cfg.tags)) + promscrape.WithMeasurement(cfg.measurement), + promscrape.WithExtraTags(cfg.tags)) if tlsOpts, err := buildPromOptionsWithAuth(&ins.Auth); err != nil { klog.Warnf("node %s has unexpected tls config %ss", key, err) diff --git a/internal/plugins/inputs/kubernetesprometheus/node_parser.go b/internal/plugins/inputs/kubernetesprometheus/node_parser.go index 3bb05cef24..49dbe3684c 100644 --- a/internal/plugins/inputs/kubernetesprometheus/node_parser.go +++ b/internal/plugins/inputs/kubernetesprometheus/node_parser.go @@ -119,20 +119,17 @@ func (p *nodeParser) parsePromConfig(ins *Instance) (*basePromConfig, error) { tags := map[string]string{} for k, v := range ins.Tags { - switch v { - case MateInstanceTag: - tags[k] = u.Host - case MateHostTag: - if host := splitHost(u.Host); host != "" { - tags[k] = host - } - default: - if matched, res := p.matches(v); matched && res != "" { + if matched, res := matchInstanceOrHost(v, u.Host); matched { + if res != "" { tags[k] = res - } else { - tags[k] = v } + continue + } + if matched, res := p.matches(v); matched && res != "" { + tags[k] = res + continue } + tags[k] = v } measurement := ins.Measurement diff --git a/internal/plugins/inputs/kubernetesprometheus/pod.go b/internal/plugins/inputs/kubernetesprometheus/pod.go index b0988ea44e..fb5f2271a2 100644 --- a/internal/plugins/inputs/kubernetesprometheus/pod.go +++ b/internal/plugins/inputs/kubernetesprometheus/pod.go @@ -10,7 +10,7 @@ import ( "fmt" dkio "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/io" - iprom "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/prom" + "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/promscrape" corev1 "k8s.io/api/core/v1" "k8s.io/client-go/informers" infov1 "k8s.io/client-go/informers/core/v1" @@ -109,22 +109,21 @@ func (p *Pod) process(ctx context.Context) bool { return true } - if p.scrape.matchesKey(key, podFeature(pod)) { + if shouldSkipPod(pod) { return true } - klog.Infof("found pod %s", key) + if p.scrape.matchesKey(key, podFeature(pod)) { + return true + } + klog.Infof("found new pod %s", key) p.terminateScrape(key) p.startScrape(ctx, key, pod) return true } func (p *Pod) startScrape(ctx context.Context, key string, item *corev1.Pod) { - if shouldSkipPod(item) { - return - } - feature := podFeature(item) checkPausedFunc := func() bool { return checkPaused(ctx, false /* not use election */) @@ -154,8 +153,8 @@ func (p *Pod) startScrape(ctx context.Context, key string, item *corev1.Pod) { opts := buildPromOptions( RolePod, key, p.feeder, - iprom.WithMeasurementName(cfg.measurement), - iprom.WithTags(cfg.tags)) + promscrape.WithMeasurement(cfg.measurement), + promscrape.WithExtraTags(cfg.tags)) if tlsOpts, err := buildPromOptionsWithAuth(&ins.Auth); err != nil { klog.Warnf("pod %s has unexpected tls config %s", key, err) diff --git a/internal/plugins/inputs/kubernetesprometheus/pod_parser.go b/internal/plugins/inputs/kubernetesprometheus/pod_parser.go index 04b790e65b..86dbf04e00 100644 --- a/internal/plugins/inputs/kubernetesprometheus/pod_parser.go +++ b/internal/plugins/inputs/kubernetesprometheus/pod_parser.go @@ -114,22 +114,18 @@ func (p *podParser) parsePromConfig(ins *Instance) (*basePromConfig, error) { } tags := map[string]string{} - for k, v := range ins.Tags { - switch v { - case MateInstanceTag: - tags[k] = u.Host - case MateHostTag: - if host := splitHost(u.Host); host != "" { - tags[k] = host - } - default: - if matched, res := p.matches(v); matched && res != "" { + if matched, res := matchInstanceOrHost(v, u.Host); matched { + if res != "" { tags[k] = res - } else { - tags[k] = v } + continue + } + if matched, res := p.matches(v); matched && res != "" { + tags[k] = res + continue } + tags[k] = v } measurement := ins.Measurement diff --git a/internal/plugins/inputs/kubernetesprometheus/prom.go b/internal/plugins/inputs/kubernetesprometheus/prom.go index cd06a9ba54..1563b6fcdf 100644 --- a/internal/plugins/inputs/kubernetesprometheus/prom.go +++ b/internal/plugins/inputs/kubernetesprometheus/prom.go @@ -6,19 +6,21 @@ package kubernetesprometheus import ( + "fmt" + "net/url" "os" "sync/atomic" "time" "github.com/GuanceCloud/cliutils/point" dkio "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/io" - iprom "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/prom" + "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/promscrape" ) type promScraper struct { - role, key string - urlstr string - pm *iprom.Prom + role, key string + urlstr, remote string + pm *promscrape.PromScraper checkPaused func() bool terminated atomic.Bool @@ -33,7 +35,7 @@ func newPromScraper( urlstr string, interval time.Duration, checkPaused func() bool, - opts []iprom.PromOption, + opts []promscrape.Option, ) (*promScraper, error) { var err error p := promScraper{ @@ -44,7 +46,12 @@ func newPromScraper( interval: interval, } - p.pm, err = iprom.NewProm(opts...) + u, err := url.Parse(urlstr) + if err == nil { + p.remote = fmt.Sprintf(":%s%s", u.Port(), u.Path) + } + + p.pm, err = promscrape.NewPromScraper(opts...) if err != nil { return nil, err } @@ -73,12 +80,12 @@ func (p *promScraper) shouldScrape() bool { func (p *promScraper) scrape() error { p.lastTime = time.Now() - _, err := p.pm.CollectFromHTTPV2(p.urlstr) - scrapeTargetCost.WithLabelValues(p.role, p.key, p.urlstr).Observe(float64(time.Since(p.lastTime)) / float64(time.Second)) + err := p.pm.ScrapeURL(p.urlstr) + scrapeTargetCost.WithLabelValues(p.role, p.key, p.remote).Observe(float64(time.Since(p.lastTime)) / float64(time.Second)) return err } -func buildPromOptions(role Role, key string, feeder dkio.Feeder, opts ...iprom.PromOption) []iprom.PromOption { +func buildPromOptions(role Role, key string, feeder dkio.Feeder, opts ...promscrape.Option) []promscrape.Option { name := string(role) + "/" + key callbackFn := func(pts []*point.Point) error { @@ -100,34 +107,34 @@ func buildPromOptions(role Role, key string, feeder dkio.Feeder, opts ...iprom.P return nil } - res := []iprom.PromOption{ - iprom.WithLogger(klog), // WithLogger must in the first - iprom.WithSource(name), - iprom.WithMaxBatchCallback(1, callbackFn), + res := []promscrape.Option{ + // promscrape.WithLogger(klog), // WithLogger must in the first + promscrape.WithSource(name), + promscrape.WithCallback(callbackFn), } res = append(res, opts...) return res } -func buildPromOptionsWithAuth(auth *Auth) ([]iprom.PromOption, error) { - var opts []iprom.PromOption +func buildPromOptionsWithAuth(auth *Auth) ([]promscrape.Option, error) { + var opts []promscrape.Option if auth.BearerTokenFile != "" { token, err := os.ReadFile(auth.BearerTokenFile) if err != nil { return nil, err } - opts = append(opts, iprom.WithBearerToken(string(token))) + opts = append(opts, promscrape.WithBearerToken(string(token))) } if auth.TLSConfig != nil { opts = append( opts, - iprom.WithTLSOpen(true), - iprom.WithCacertFiles(auth.TLSConfig.CaCerts), - iprom.WithCertFile(auth.TLSConfig.Cert), - iprom.WithKeyFile(auth.TLSConfig.CertKey), - iprom.WithInsecureSkipVerify(auth.TLSConfig.InsecureSkipVerify), + promscrape.WithTLSOpen(true), + promscrape.WithCacertFiles(auth.TLSConfig.CaCerts), + promscrape.WithCertFile(auth.TLSConfig.Cert), + promscrape.WithKeyFile(auth.TLSConfig.CertKey), + promscrape.WithInsecureSkipVerify(auth.TLSConfig.InsecureSkipVerify), ) } diff --git a/internal/plugins/inputs/kubernetesprometheus/scrape.go b/internal/plugins/inputs/kubernetesprometheus/scrape.go index 374f041f3f..1cc79c4758 100644 --- a/internal/plugins/inputs/kubernetesprometheus/scrape.go +++ b/internal/plugins/inputs/kubernetesprometheus/scrape.go @@ -118,7 +118,11 @@ func (s *scrapeManager) doWork(ctx context.Context, name string) { if !ok { return } - tasks = append(tasks, sp) + if len(tasks) >= 100 { + klog.Warnf("%s scrape is over limit", s.role) + } else { + tasks = append(tasks, sp) + } case <-tick.C: // next @@ -133,6 +137,7 @@ func (s *scrapeManager) doWork(ctx context.Context, name string) { if task.shouldScrape() { if err := task.scrape(); err != nil { klog.Warnf("failed to scrape url %s, err %s", task.targetURL(), err) + removeIndex = append(removeIndex, idx) } } } diff --git a/internal/plugins/inputs/kubernetesprometheus/service.go b/internal/plugins/inputs/kubernetesprometheus/service.go index 8a4a84c3fb..d7a46b9fe8 100644 --- a/internal/plugins/inputs/kubernetesprometheus/service.go +++ b/internal/plugins/inputs/kubernetesprometheus/service.go @@ -11,7 +11,7 @@ import ( "time" dkio "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/io" - iprom "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/prom" + "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/promscrape" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/informers" @@ -116,22 +116,21 @@ func (s *Service) process(ctx context.Context) bool { return true } - if feature, ok := s.svcList[key]; ok && feature == serviceFeature(svc) { + if shouldSkipService(svc) { return true } - klog.Infof("found service %s", key) + if feature, ok := s.svcList[key]; ok && feature == serviceFeature(svc) { + return true + } + klog.Infof("found new service %s", key) s.terminateScrape(key) s.startScrape(ctx, key, svc) return true } func (s *Service) startScrape(ctx context.Context, key string, item *corev1.Service) { - if shouldSkipService(item) { - return - } - svcFeature := serviceFeature(item) for _, ins := range s.instances { @@ -205,8 +204,8 @@ func (s *Service) tryCreateScrapeForEndpoints(ctx context.Context, namespace, na opts := buildPromOptions( RoleService, key, s.feeder, - iprom.WithMeasurementName(cfg.measurement), - iprom.WithTags(cfg.tags)) + promscrape.WithMeasurement(cfg.measurement), + promscrape.WithExtraTags(cfg.tags)) if tlsOpts, err := buildPromOptionsWithAuth(&endpointsInstance.Auth); err != nil { klog.Warnf("svc-ep %s has unexpected tls config %s", key, err) diff --git a/internal/prom/options.go b/internal/prom/options.go index bdcf8ad378..baa9ebe7c7 100644 --- a/internal/prom/options.go +++ b/internal/prom/options.go @@ -51,24 +51,34 @@ type option struct { batchCallback func([]*point.Point) error streamSize int - l *logger.Logger + + l *logger.Logger } type PromOption func(opt *option) +var minimumHTTPTimeout = time.Second * 3 + +func defaultOption() *option { + return &option{ + l: logger.DefaultSLogger("prom"), + timeout: minimumHTTPTimeout, + } +} + func WithSource(str string) PromOption { return func(opt *option) { opt.source = str } } -func WithTimeout(dura time.Duration) PromOption { +func WithTimeout(dur time.Duration) PromOption { return func(opt *option) { - if dura > 0 { - opt.timeout = dura + if minimumHTTPTimeout < dur { + opt.timeout = dur } } } -func WithKeepAlive(dura time.Duration) PromOption { +func WithKeepAlive(dur time.Duration) PromOption { return func(opt *option) { - if dura > 0 { - opt.keepAlive = dura + if dur > 0 { + opt.keepAlive = dur } } } diff --git a/internal/prom/prom.go b/internal/prom/prom.go index 87a29979b1..56318e11b0 100644 --- a/internal/prom/prom.go +++ b/internal/prom/prom.go @@ -13,6 +13,7 @@ import ( "io" "net" "net/http" + "net/http/httptrace" "net/url" "os" "path" @@ -20,7 +21,6 @@ import ( "regexp" "time" - "github.com/GuanceCloud/cliutils/logger" "github.com/GuanceCloud/cliutils/point" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" @@ -58,8 +58,6 @@ func (opt *option) GetSource(defaultSource ...string) string { return "prom" //nolint:goconst } -const httpTimeout = time.Second * 3 - type Prom struct { opt *option client *http.Client @@ -68,23 +66,13 @@ type Prom struct { ptCount int } -func NewProm(promOpts ...PromOption) (*Prom, error) { - opt := option{} - for idx := range promOpts { - if promOpts[idx] != nil { - promOpts[idx](&opt) - } +func NewProm(opts ...PromOption) (*Prom, error) { + opt := defaultOption() + for _, fn := range opts { + fn(opt) } - if opt.l == nil { - opt.l = logger.DefaultSLogger("prom") - } - - if opt.timeout < httpTimeout { - opt.timeout = httpTimeout - } - - p := Prom{opt: &opt, InfoTags: make(map[string]string)} + p := Prom{opt: opt, InfoTags: make(map[string]string)} var f expfmt.BatchCallback = func(mf map[string]*dto.MetricFamily) error { pts, err := p.MetricFamilies2points(mf, "") @@ -102,8 +90,10 @@ func NewProm(promOpts ...PromOption) (*Prom, error) { cliopts := httpcli.NewOptions() cliopts.DialTimeout = opt.timeout cliopts.DialKeepAlive = opt.keepAlive + cliopts.MaxIdleConns = 1 + cliopts.MaxIdleConnsPerHost = 1 - if tlsConfig, err := loadTLSConfig(&opt); err != nil { + if tlsConfig, err := loadTLSConfig(opt); err != nil { return nil, fmt.Errorf("could not load tlsConfig %w", err) } else if tlsConfig != nil { cliopts.TLSClientConfig = tlsConfig @@ -160,6 +150,11 @@ func (p *Prom) Request(url string) (*http.Response, error) { return nil, err } + // trace + s := httpcli.NewHTTPClientTraceStat("prom/" + p.opt.source) + defer s.Metrics() + req = req.WithContext(httptrace.WithClientTrace(req.Context(), s.Trace())) + r, err := p.client.Do(req) if err != nil { return nil, err diff --git a/internal/prom/util.go b/internal/prom/util.go index 5d9df9cb45..70831a4f3d 100644 --- a/internal/prom/util.go +++ b/internal/prom/util.go @@ -418,6 +418,14 @@ func (p *Prom) MetricFamilies2points(metricFamilies map[string]*dto.MetricFamily return pts, nil } +func (p *Prom) getMode() string { + if p.opt.streamSize > 0 { + return "stream" + } else { + return "no_stream" + } +} + func getValue(m *dto.Metric, metricType dto.MetricType) float64 { switch metricType { //nolint:exhaustive case dto.MetricType_GAUGE: @@ -438,11 +446,3 @@ func getTimestampS(m *dto.Metric, startTime time.Time) time.Time { } return startTime } - -func (p *Prom) getMode() string { - if p.opt.streamSize > 0 { - return "stream" - } else { - return "no_stream" - } -} diff --git a/internal/prom/util_test.go b/internal/prom/util_test.go index 17769077fd..07f43df854 100644 --- a/internal/prom/util_test.go +++ b/internal/prom/util_test.go @@ -6,11 +6,78 @@ package prom import ( + "bytes" + "fmt" "testing" + "github.com/GuanceCloud/cliutils/point" "github.com/stretchr/testify/assert" ) +const ( + mockHeader = ` +# HELP datakit_http_worker_number The number of the worker +# TYPE datakit_http_worker_number gauge +` + mockBody = ` +datakit_http_worker_number{category="metric",domain="dataway.testing.com",status="%d"} 11.0 +datakit_http_worker_number{category="metric",domain="dataway.testing.com",status="%d"} 12.2 +datakit_http_worker_number{category="metric",domain="dataway.testing.com",status="%d"} 13.0 +datakit_http_worker_number{category="metric",domain="dataway.testing.com",status="%d"} 14.2 +datakit_http_worker_number{category="metric",domain="dataway.testing.com",status="%d"} 15.0 +` +) + +func TestParseMetrics(t *testing.T) { + var buf bytes.Buffer + buf.WriteString(mockHeader) + for i := 0; i < 10000; i++ { + buf.WriteString(fmt.Sprintf(mockBody, i, i, i, i, i)) + } + + count := 0 + + opts := []PromOption{ + WithMeasurementName("testing-meas"), + WithTags(map[string]string{"key-01": "value-01"}), + WithMaxBatchCallback(1, func(pts []*point.Point) error { + count += len(pts) + return nil + }), + } + prom, err := NewProm(opts...) + assert.NoError(t, err) + + _, err = prom.text2MetricsBatch(&buf, "") + assert.NoError(t, err) + + fmt.Printf("count: %d\n", count) +} + +func BenchmarkParseMetrics(b *testing.B) { + var buf bytes.Buffer + buf.WriteString(mockHeader) + for i := 0; i < 10000; i++ { + buf.WriteString(fmt.Sprintf(mockBody, i, i, i, i, i)) + } + + opts := []PromOption{ + WithMeasurementName("testing-meas"), + WithTags(map[string]string{"key-01": "value-01"}), + WithMaxBatchCallback(1, func(pts []*point.Point) error { + return nil + }), + } + prom, err := NewProm(opts...) + assert.NoError(b, err) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err = prom.text2MetricsBatch(&buf, "") + assert.NoError(b, err) + } +} + func TestGetNamesByDefault(t *testing.T) { cases := []struct { inName string diff --git a/internal/promscrape/options.go b/internal/promscrape/options.go new file mode 100644 index 0000000000..374390233a --- /dev/null +++ b/internal/promscrape/options.go @@ -0,0 +1,113 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +package promscrape + +import ( + "fmt" + "time" + + "github.com/GuanceCloud/cliutils/point" + dknet "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/net" +) + +type option struct { + optionClientConn + + source string + measurement string + keepExistMetricName bool + + extraTags map[string]string + callback func([]*point.Point) error +} + +type optionClientConn struct { + timeout time.Duration + keepAlive time.Duration + + tlsOpen bool + cacertFiles []string + certFile string + keyFile string + insecureSkipVerify bool + tlsClientConfig *dknet.TLSClientConfig + + headers map[string]string +} + +type Option func(opt *option) + +var discardPointsFn = func([]*point.Point) error { + return fmt.Errorf("discard points") +} + +func defaultOption() *option { + return &option{ + optionClientConn: optionClientConn{ + timeout: time.Second * 10, + headers: make(map[string]string), + }, + extraTags: make(map[string]string), + callback: discardPointsFn, + } +} + +func WithSource(str string) Option { return func(opt *option) { opt.source = str } } +func WithMeasurement(str string) Option { return func(opt *option) { opt.measurement = str } } +func KeepExistMetricName(b bool) Option { + return func(opt *option) { opt.keepExistMetricName = b } +} + +func WithTimeout(dur time.Duration) Option { + return func(opt *option) { + if dur > 0 { + opt.timeout = dur + } + } +} + +func WithKeepAlive(dur time.Duration) Option { + return func(opt *option) { + if dur > 0 { + opt.keepAlive = dur + } + } +} + +func WithTLSOpen(b bool) Option { return func(opt *option) { opt.tlsOpen = b } } +func WithCacertFiles(arr []string) Option { return func(opt *option) { opt.cacertFiles = arr } } +func WithCertFile(str string) Option { return func(opt *option) { opt.certFile = str } } +func WithKeyFile(str string) Option { return func(opt *option) { opt.keyFile = str } } + +func WithTLSClientConfig(t *dknet.TLSClientConfig) Option { + return func(opt *option) { opt.tlsClientConfig = t } +} + +func WithInsecureSkipVerify(b bool) Option { + return func(opt *option) { opt.insecureSkipVerify = b } +} + +func WithBearerToken(str string) Option { + return func(opt *option) { + opt.headers["Authorization"] = "Bearer " + str + } +} + +func WithExtraTags(m map[string]string) Option { + return func(opt *option) { + for k, v := range m { + opt.extraTags[k] = v + } + } +} + +func WithCallback(fn func([]*point.Point) error) Option { + return func(opt *option) { + if fn != nil { + opt.callback = fn + } + } +} diff --git a/internal/promscrape/parser.go b/internal/promscrape/parser.go new file mode 100644 index 0000000000..0e317c26e8 --- /dev/null +++ b/internal/promscrape/parser.go @@ -0,0 +1,256 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +package promscrape + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "strings" + "sync" + "time" +) + +const ( + // The maximum size of a single line returned by ReadLinesBlock. + maxLineSize = 256 * 1024 + + // Default size in bytes of a single block returned by ReadLinesBlock. + defaultBlockSize = 64 * 1024 +) + +func ParseStream(r io.Reader, defaultTimestamp int64, isGzipped bool, callback func(rows []Row) error) error { + ctx := getStreamContext(r) + defer putStreamContext(ctx) + for ctx.Read() { + uw := getUnmarshalWork() + uw.ctx = ctx + uw.callback = callback + uw.defaultTimestamp = defaultTimestamp + uw.reqBuf, ctx.reqBuf = ctx.reqBuf, uw.reqBuf + if err := uw.Unmarshal(); err != nil { + ctx.err = err + } + putUnmarshalWork(uw) + } + return ctx.Error() +} + +var ( + unmarshalWorkPool sync.Pool + streamContextPool sync.Pool +) + +func getUnmarshalWork() *unmarshalWork { + v := unmarshalWorkPool.Get() + if v == nil { + return &unmarshalWork{} + } + return v.(*unmarshalWork) +} + +func putUnmarshalWork(uw *unmarshalWork) { + uw.reset() + unmarshalWorkPool.Put(uw) +} + +func getStreamContext(r io.Reader) *streamContext { + if v := streamContextPool.Get(); v != nil { + ctx := v.(*streamContext) + ctx.br.Reset(r) + return ctx + } + return &streamContext{ + br: bufio.NewReaderSize(r, defaultBlockSize), + } +} + +func putStreamContext(ctx *streamContext) { + ctx.reset() + streamContextPool.Put(ctx) +} + +type streamContext struct { + br *bufio.Reader + reqBuf []byte + tailBuf []byte + err error +} + +func (ctx *streamContext) Read() bool { + if ctx.err != nil { + return false + } + ctx.reqBuf, ctx.tailBuf, ctx.err = ReadLinesBlock(ctx.br, ctx.reqBuf, ctx.tailBuf) + if ctx.err != nil { + if errors.Is(ctx.err, io.EOF) { + ctx.err = fmt.Errorf("cannot read Prometheus exposition data: %w", ctx.err) + } + return false + } + return true +} + +func (ctx *streamContext) Error() error { + if errors.Is(ctx.err, io.EOF) { + return nil + } + return ctx.err +} + +func (ctx *streamContext) reset() { + ctx.br.Reset(nil) + ctx.reqBuf = ctx.reqBuf[:0] + ctx.tailBuf = ctx.tailBuf[:0] + ctx.err = nil +} + +type unmarshalWork struct { + rows Rows + ctx *streamContext + callback func(rows []Row) error + defaultTimestamp int64 + reqBuf []byte +} + +func (uw *unmarshalWork) reset() { + uw.rows.Reset() + uw.ctx = nil + uw.callback = nil + uw.defaultTimestamp = 0 + uw.reqBuf = uw.reqBuf[:0] +} + +func (uw *unmarshalWork) runCallback(rows []Row) error { + return uw.callback(rows) +} + +func (uw *unmarshalWork) Unmarshal() error { + if err := uw.rows.Unmarshal(string(uw.reqBuf)); err != nil { + return err + } + + rows := uw.rows.Rows + + defaultTimestamp := uw.defaultTimestamp + if defaultTimestamp <= 0 { + defaultTimestamp = time.Now().UnixNano() / 1e6 + } + for i := range rows { + r := &rows[i] + if r.Timestamp == 0 { + r.Timestamp = defaultTimestamp + } + } + + return uw.runCallback(rows) +} + +// ReadLinesBlock reads a block of lines delimited by '\n' from tailBuf and r into dstBuf. +// +// Trailing chars after the last newline are put into tailBuf. +// +// Returns (dstBuf, tailBuf). +// +// It is expected that read timeout on r exceeds 1 second. +func ReadLinesBlock(r io.Reader, dstBuf, tailBuf []byte) ([]byte, []byte, error) { + return ReadLinesBlockExt(r, dstBuf, tailBuf, maxLineSize) +} + +// ReadLinesBlockExt reads a block of lines delimited by '\n' from tailBuf and r into dstBuf. +// +// Trailing chars after the last newline are put into tailBuf. +// +// Returns (dstBuf, tailBuf). +// +// maxLineLen limits the maximum length of a single line. +// +// It is expected that read timeout on r exceeds 1 second. +func ReadLinesBlockExt(r io.Reader, dstBuf, tailBuf []byte, maxLineLen int) ([]byte, []byte, error) { + if cap(dstBuf) < defaultBlockSize { + dstBuf = ResizeNoCopyNoOverallocate(dstBuf, defaultBlockSize) + } + dstBuf = append(dstBuf[:0], tailBuf...) + tailBuf = tailBuf[:0] +again: + n, err := r.Read(dstBuf[len(dstBuf):cap(dstBuf)]) + // Check for error only if zero bytes read from r, i.e. no forward progress made. + // Otherwise process the read data. + if n == 0 { + if err == nil { + return dstBuf, tailBuf, fmt.Errorf("no forward progress made") + } + isEOF := isEOFLikeError(err) + if isEOF && len(dstBuf) > 0 { + // Missing newline in the end of stream. This is OK, + // so suppress io.EOF for now. It will be returned during the next + // call to ReadLinesBlock. + // This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/60 . + return dstBuf, tailBuf, nil + } + if !isEOF { + err = fmt.Errorf("cannot read a block of data: %w", err) + } else { + err = io.EOF + } + return dstBuf, tailBuf, err + } + dstBuf = dstBuf[:len(dstBuf)+n] + + // Search for the last newline in dstBuf and put the rest into tailBuf. + nn := bytes.LastIndexByte(dstBuf[len(dstBuf)-n:], '\n') + if nn < 0 { + // Didn't found at least a single line. + if len(dstBuf) > maxLineLen { + return dstBuf, tailBuf, fmt.Errorf("too long line: more than %d bytes", maxLineLen) + } + if cap(dstBuf) < 2*len(dstBuf) { + // Increase dsbBuf capacity, so more data could be read into it. + dstBufLen := len(dstBuf) + dstBuf = ResizeWithCopyNoOverallocate(dstBuf, 2*cap(dstBuf)) + dstBuf = dstBuf[:dstBufLen] + } + goto again + } + + // Found at least a single line. Return it. + nn += len(dstBuf) - n + tailBuf = append(tailBuf[:0], dstBuf[nn+1:]...) + dstBuf = dstBuf[:nn] + return dstBuf, tailBuf, nil +} + +func isEOFLikeError(err error) bool { + if errors.Is(err, io.EOF) { + return true + } + s := err.Error() + return strings.Contains(s, "reset by peer") +} + +// ResizeNoCopyNoOverallocate resizes b to exactly n bytes and returns the resized buffer (which may be newly allocated). +// +// If newly allocated buffer is returned then b contents isn't copied to it. +func ResizeNoCopyNoOverallocate(b []byte, n int) []byte { + if n <= cap(b) { + return b[:n] + } + return make([]byte, n) +} + +// ResizeWithCopyNoOverallocate resizes b to exactly n bytes and returns the resized buffer (which may be newly allocated). +// +// If newly allocated buffer is returned then b contents is copied to it. +func ResizeWithCopyNoOverallocate(b []byte, n int) []byte { + if n <= cap(b) { + return b[:n] + } + bNew := make([]byte, n) + copy(bNew, b) + return bNew +} diff --git a/internal/promscrape/rows.go b/internal/promscrape/rows.go new file mode 100644 index 0000000000..2cfb4acbfd --- /dev/null +++ b/internal/promscrape/rows.go @@ -0,0 +1,676 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +package promscrape + +import ( + "bytes" + "fmt" + "sort" + "strings" + "sync" + + "github.com/valyala/fastjson/fastfloat" +) + +// Rows contains parsed Prometheus rows. +type Rows struct { + Rows []Row + + tagsPool []Tag +} + +// Reset resets rs. +func (rs *Rows) Reset() { + // Reset items, so they can be GC'ed + + for i := range rs.Rows { + rs.Rows[i].reset() + } + rs.Rows = rs.Rows[:0] + + for i := range rs.tagsPool { + rs.tagsPool[i].reset() + } + rs.tagsPool = rs.tagsPool[:0] +} + +// Unmarshal unmarshal Prometheus exposition text rows from s. +// See https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-format-details +func (rs *Rows) Unmarshal(s string) error { + var err error + noEscapes := strings.IndexByte(s, '\\') < 0 + rs.Rows, rs.tagsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0], noEscapes) + return err +} + +// Row is a single Prometheus row. +type Row struct { + Metric string + Tags []Tag + Value float64 + Timestamp int64 +} + +func (r *Row) reset() { + r.Metric = "" + r.Tags = nil + r.Value = 0 + r.Timestamp = 0 +} + +func skipTrailingComment(s string) string { + n := strings.IndexByte(s, '#') + if n < 0 { + return s + } + return s[:n] +} + +func skipLeadingWhitespace(s string) string { + // Prometheus treats ' ' and '\t' as whitespace + // according to https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-format-details + for len(s) > 0 && (s[0] == ' ' || s[0] == '\t') { + s = s[1:] + } + return s +} + +func skipTrailingWhitespace(s string) string { + // Prometheus treats ' ' and '\t' as whitespace + // according to https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-format-details + for len(s) > 0 && (s[len(s)-1] == ' ' || s[len(s)-1] == '\t') { + s = s[:len(s)-1] + } + return s +} + +func nextWhitespace(s string) int { + n := strings.IndexByte(s, ' ') + if n < 0 { + return strings.IndexByte(s, '\t') + } + n1 := strings.IndexByte(s, '\t') + if n1 < 0 || n1 > n { + return n + } + return n1 +} + +func (r *Row) unmarshal(s string, tagsPool []Tag, noEscapes bool) ([]Tag, error) { + r.reset() + s = skipLeadingWhitespace(s) + n := strings.IndexByte(s, '{') + if n >= 0 { + // Tags found. Parse them. + r.Metric = skipTrailingWhitespace(s[:n]) + s = s[n+1:] + tagsStart := len(tagsPool) + var err error + s, tagsPool, err = unmarshalTags(tagsPool, s, noEscapes) + if err != nil { + return tagsPool, fmt.Errorf("cannot unmarshal tags: %w", err) + } + if len(s) > 0 && s[0] == ' ' { + // Fast path - skip whitespace. + s = s[1:] + } + tags := tagsPool[tagsStart:] + r.Tags = tags[:len(tags):len(tags)] + } else { + // Tags weren't found. Search for value after whitespace + n = nextWhitespace(s) + if n < 0 { + return tagsPool, fmt.Errorf("missing value") + } + r.Metric = s[:n] + s = s[n+1:] + } + if len(r.Metric) == 0 { + return tagsPool, fmt.Errorf("metric cannot be empty") + } + s = skipLeadingWhitespace(s) + s = skipTrailingComment(s) + if len(s) == 0 { + return tagsPool, fmt.Errorf("value cannot be empty") + } + n = nextWhitespace(s) + if n < 0 { + // There is no timestamp. + v, err := fastfloat.Parse(s) + if err != nil { + return tagsPool, fmt.Errorf("cannot parse value %q: %w", s, err) + } + r.Value = v + return tagsPool, nil + } + // There is a timestamp. + v, err := fastfloat.Parse(s[:n]) + if err != nil { + return tagsPool, fmt.Errorf("cannot parse value %q: %w", s[:n], err) + } + r.Value = v + s = skipLeadingWhitespace(s[n+1:]) + if len(s) == 0 { + // There is no timestamp - just a whitespace after the value. + return tagsPool, nil + } + // There are some whitespaces after timestamp + s = skipTrailingWhitespace(s) + ts, err := fastfloat.Parse(s) + if err != nil { + return tagsPool, fmt.Errorf("cannot parse timestamp %q: %w", s, err) + } + if ts >= -1<<31 && ts < 1<<31 { + // This looks like OpenMetrics timestamp in Unix seconds. + // Convert it to milliseconds. + // + // See https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md#timestamps + ts *= 1000 + } + r.Timestamp = int64(ts) + return tagsPool, nil +} + +// var rowsReadScrape = metrics.NewCounter(`vm_protoparser_rows_read_total{type="promscrape"}`) + +func unmarshalRows(dst []Row, s string, tagsPool []Tag, noEscapes bool) ([]Row, []Tag, error) { + var err error + // dstLen := len(dst) + for len(s) > 0 { + n := strings.IndexByte(s, '\n') + if n < 0 { + // The last line. + dst, tagsPool, err = unmarshalRow(dst, s, tagsPool, noEscapes) + if err != nil { + return nil, nil, err + } + break + } + dst, tagsPool, err = unmarshalRow(dst, s[:n], tagsPool, noEscapes) + if err != nil { + return nil, nil, err + } + s = s[n+1:] + } + // rowsReadScrape.Add(len(dst) - dstLen) + return dst, tagsPool, nil +} + +func unmarshalRow(dst []Row, s string, tagsPool []Tag, noEscapes bool) ([]Row, []Tag, error) { + if len(s) > 0 && s[len(s)-1] == '\r' { + s = s[:len(s)-1] + } + s = skipLeadingWhitespace(s) + if len(s) == 0 { + // Skip empty line + return dst, tagsPool, nil + } + if s[0] == '#' { + // Skip comment + return dst, tagsPool, nil + } + if cap(dst) > len(dst) { + dst = dst[:len(dst)+1] + } else { + dst = append(dst, Row{}) + } + r := &dst[len(dst)-1] + var err error + tagsPool, err = r.unmarshal(s, tagsPool, noEscapes) + if err != nil { + return nil, nil, fmt.Errorf("cannot unmarshal Prometheus line %q: %w", s, err) + } + return dst, tagsPool, nil +} + +// var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="prometheus"}`) + +func unmarshalTags(dst []Tag, s string, noEscapes bool) (string, []Tag, error) { + for { + s = skipLeadingWhitespace(s) + if len(s) > 0 && s[0] == '}' { + // End of tags found. + return s[1:], dst, nil + } + n := strings.IndexByte(s, '=') + if n < 0 { + return s, dst, fmt.Errorf("missing value for tag %q", s) + } + key := skipTrailingWhitespace(s[:n]) + s = skipLeadingWhitespace(s[n+1:]) + if len(s) == 0 || s[0] != '"' { + return s, dst, fmt.Errorf("expecting quoted value for tag %q; got %q", key, s) + } + value := s[1:] + if noEscapes { + // Fast path - the line has no escape chars + n = strings.IndexByte(value, '"') + if n < 0 { + return s, dst, fmt.Errorf("missing closing quote for tag value %q", s) + } + s = value[n+1:] + value = value[:n] + } else { + // Slow path - the line contains escape chars + n = findClosingQuote(s) + if n < 0 { + return s, dst, fmt.Errorf("missing closing quote for tag value %q", s) + } + value = unescapeValue(s[1:n]) + s = s[n+1:] + } + if len(key) > 0 { + // Allow empty values (len(value)==0) - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/453 + if cap(dst) > len(dst) { + dst = dst[:len(dst)+1] + } else { + dst = append(dst, Tag{}) + } + tag := &dst[len(dst)-1] + tag.Key = key + tag.Value = value + } + s = skipLeadingWhitespace(s) + if len(s) > 0 && s[0] == '}' { + // End of tags found. + return s[1:], dst, nil + } + if len(s) == 0 || s[0] != ',' { + return s, dst, fmt.Errorf("missing comma after tag %s=%q", key, value) + } + s = s[1:] + } +} + +// Tag is a Prometheus tag. +type Tag struct { + Key string + Value string +} + +func (t *Tag) reset() { + t.Key = "" + t.Value = "" +} + +func findClosingQuote(s string) int { + if len(s) == 0 || s[0] != '"' { + return -1 + } + off := 1 + s = s[1:] + for { + n := strings.IndexByte(s, '"') + if n < 0 { + return -1 + } + if prevBackslashesCount(s[:n])%2 == 0 { + return off + n + } + off += n + 1 + s = s[n+1:] + } +} + +func unescapeValue(s string) string { + n := strings.IndexByte(s, '\\') + if n < 0 { + // Fast path - nothing to unescape + return s + } + b := make([]byte, 0, len(s)) + for { + b = append(b, s[:n]...) + s = s[n+1:] + if len(s) == 0 { + b = append(b, '\\') + break + } + // label_value can be any sequence of UTF-8 characters, but the backslash (\), double-quote ("), + // and line feed (\n) characters have to be escaped as \\, \", and \n, respectively. + // See https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md + switch s[0] { + case '\\': + b = append(b, '\\') + case '"': + b = append(b, '"') + case 'n': + b = append(b, '\n') + default: + b = append(b, '\\', s[0]) + } + s = s[1:] + n = strings.IndexByte(s, '\\') + if n < 0 { + b = append(b, s...) + break + } + } + return string(b) +} + +func appendEscapedValue(dst []byte, s string) []byte { + // label_value can be any sequence of UTF-8 characters, but the backslash (\), double-quote ("), + // and line feed (\n) characters have to be escaped as \\, \", and \n, respectively. + // See https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md + for { + n := strings.IndexAny(s, "\\\"\n") + if n < 0 { + return append(dst, s...) + } + dst = append(dst, s[:n]...) + switch s[n] { + case '\\': + dst = append(dst, "\\\\"...) + case '"': + dst = append(dst, "\\\""...) + case '\n': + dst = append(dst, "\\n"...) + } + s = s[n+1:] + } +} + +func prevBackslashesCount(s string) int { + n := 0 + for len(s) > 0 && s[len(s)-1] == '\\' { + n++ + s = s[:len(s)-1] + } + return n +} + +// GetRowsDiff returns rows from s1, which are missing in s2. +// +// The returned rows have default value 0 and have no timestamps. +func GetRowsDiff(s1, s2 string) string { + li1 := getLinesIterator() + li2 := getLinesIterator() + defer func() { + putLinesIterator(li1) + putLinesIterator(li2) + }() + li1.Init(s1) + li2.Init(s2) + if !li1.NextKey() { + return "" + } + var diff []byte + if !li2.NextKey() { + diff = appendKeys(diff, li1) + return string(diff) + } + for { + switch bytes.Compare(li1.Key, li2.Key) { + case -1: + diff = appendKey(diff, li1.Key) + if !li1.NextKey() { + return string(diff) + } + case 0: + if !li1.NextKey() { + return string(diff) + } + if !li2.NextKey() { + diff = appendKeys(diff, li1) + return string(diff) + } + case 1: + if !li2.NextKey() { + diff = appendKeys(diff, li1) + return string(diff) + } + } + } +} + +type linesIterator struct { + rows []Row + a []string + tagsPool []Tag + + // Key contains the next key after NextKey call + Key []byte +} + +var linesIteratorPool sync.Pool + +func getLinesIterator() *linesIterator { + v := linesIteratorPool.Get() + if v == nil { + return &linesIterator{} + } + return v.(*linesIterator) +} + +func putLinesIterator(li *linesIterator) { + li.a = nil + linesIteratorPool.Put(li) +} + +func (li *linesIterator) Init(s string) { + a := strings.Split(s, "\n") + sort.Strings(a) + li.a = a +} + +// NextKey advances to the next key in li. +// +// It returns true if the next key is found and Key is successfully updated. +func (li *linesIterator) NextKey() bool { + for { + if len(li.a) == 0 { + return false + } + // Do not process errors here, since they will be logged during the real data parsing later. + li.rows, li.tagsPool, _ = unmarshalRow(li.rows[:0], li.a[0], li.tagsPool[:0], false) + li.a = li.a[1:] + if len(li.rows) > 0 { + li.Key = marshalMetricNameWithTags(li.Key[:0], &li.rows[0]) + return true + } + } +} + +func appendKey(dst, key []byte) []byte { + dst = append(dst, key...) + dst = append(dst, " 0\n"...) + return dst +} + +func appendKeys(dst []byte, li *linesIterator) []byte { + for { + dst = appendKey(dst, li.Key) + if !li.NextKey() { + return dst + } + } +} + +func marshalMetricNameWithTags(dst []byte, r *Row) []byte { + dst = append(dst, r.Metric...) + if len(r.Tags) == 0 { + return dst + } + dst = append(dst, '{') + for i, t := range r.Tags { + dst = append(dst, t.Key...) + dst = append(dst, `="`...) + dst = appendEscapedValue(dst, t.Value) + dst = append(dst, '"') + if i+1 < len(r.Tags) { + dst = append(dst, ',') + } + } + dst = append(dst, '}') + return dst +} + +// AreIdenticalSeriesFast returns true if s1 and s2 contains identical Prometheus series with possible different values. +// +// This function is optimized for speed. +func AreIdenticalSeriesFast(s1, s2 string) bool { + for { + if len(s1) == 0 { + // The last byte on the line reached. + return len(s2) == 0 + } + if len(s2) == 0 { + // The last byte on s2 reached, while s1 has non-empty contents. + return false + } + + // Extract the next pair of lines from s1 and s2. + var x1, x2 string + n1 := strings.IndexByte(s1, '\n') + if n1 < 0 { + x1 = s1 + s1 = "" + } else { + x1 = s1[:n1] + s1 = s1[n1+1:] + } + if n := strings.IndexByte(x1, '#'); n >= 0 { + // Drop comment. + x1 = x1[:n] + } + n2 := strings.IndexByte(s2, '\n') + if n2 < 0 { + if n1 >= 0 { + return false + } + x2 = s2 + s2 = "" + } else { + if n1 < 0 { + return false + } + x2 = s2[:n2] + s2 = s2[n2+1:] + } + if n := strings.IndexByte(x2, '#'); n >= 0 { + // Drop comment. + x2 = x2[:n] + } + + // Skip whitespaces in front of lines + for len(x1) > 0 && x1[0] == ' ' { + if len(x2) == 0 || x2[0] != ' ' { + return false + } + x1 = x1[1:] + x2 = x2[1:] + } + if len(x1) == 0 { + // The last byte on x1 reached. + if len(x2) != 0 { + return false + } + continue + } + if len(x2) == 0 { + // The last byte on x2 reached, while x1 has non-empty contents. + return false + } + // Compare metric names + n := strings.IndexByte(x1, ' ') + if n < 0 { + // Invalid Prometheus line - it must contain at least a single space between metric name and value + // Compare it in full with x2. + n = len(x1) - 1 + } + n++ + if n > len(x2) || x1[:n] != x2[:n] { + // Metric names mismatch + return false + } + x1 = x1[n:] + x2 = x2[n:] + + // The space could belong to metric name in the following cases: + // foo {bar="baz"} 1 + // foo{ bar="baz"} 2 + // foo{bar="baz", aa="b"} 3 + // foo{bar="b az"} 4 + // foo 5 + // Continue comparing the remaining parts until space or newline. + for { + n1 := strings.IndexByte(x1, ' ') + if n1 < 0 { + // Fast path. + // Treat x1 as a value. + // Skip values at x1 and x2. + n2 := strings.IndexByte(x2, ' ') + if n2 >= 0 { + // x2 contains additional parts. + return false + } + break + } + n1++ + // Slow path. + // The x1[:n1] can be either a part of metric name or a value if timestamp is present: + // foo 12 34 + if isNumeric(x1[:n1-1]) { + // Skip numeric part (most likely a value before timestamp) in x1 and x2 + n2 := strings.IndexByte(x2, ' ') + if n2 < 0 { + // x2 contains less parts than x1 + return false + } + n2++ + if !isNumeric(x2[:n2-1]) { + // x1 contains numeric part, while x2 contains non-numeric part + return false + } + x1 = x1[n1:] + x2 = x2[n2:] + } else { + // The non-numeric part from x1 must match the corresponding part from x2. + if n1 > len(x2) || x1[:n1] != x2[:n1] { + // Parts mismatch + return false + } + x1 = x1[n1:] + x2 = x2[n1:] + } + } + } +} + +func isNumeric(s string) bool { + for i := 0; i < len(s); i++ { + if numericChars[s[i]] { + continue + } + if i == 0 && s == "NaN" || s == "nan" || s == "Inf" || s == "inf" { + return true + } + if i == 1 && (s[0] == '-' || s[0] == '+') && (s[1:] == "Inf" || s[1:] == "inf") { + return true + } + return false + } + return true +} + +var numericChars = [256]bool{ + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + '-': true, + '+': true, + 'e': true, + 'E': true, + '.': true, +} diff --git a/internal/promscrape/scraper.go b/internal/promscrape/scraper.go new file mode 100644 index 0000000000..f3d31e1873 --- /dev/null +++ b/internal/promscrape/scraper.go @@ -0,0 +1,154 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package promscrape used to parsing promemetheuse exportor metrics. +package promscrape + +import ( + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/GuanceCloud/cliutils/point" + + "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/httpcli" + dknet "gitlab.jiagouyun.com/cloudcare-tools/datakit/internal/net" +) + +type PromScraper struct { + opt *option + client *http.Client +} + +func NewPromScraper(opts ...Option) (*PromScraper, error) { + opt := defaultOption() + for _, fn := range opts { + fn(opt) + } + + client, err := buildHTTPClient(&opt.optionClientConn) + if err != nil { + return nil, err + } + + return &PromScraper{ + opt: opt, + client: client, + }, nil +} + +func buildHTTPClient(opt *optionClientConn) (*http.Client, error) { + clientOpts := httpcli.NewOptions() + clientOpts.DialTimeout = opt.timeout + clientOpts.DialKeepAlive = opt.keepAlive + clientOpts.MaxIdleConns = 1 + clientOpts.MaxIdleConnsPerHost = 10 + + if opt.tlsOpen { + tlsconfig := dknet.TLSClientConfig{ + CaCerts: opt.cacertFiles, + Cert: opt.certFile, + CertKey: opt.keyFile, + InsecureSkipVerify: false, + } + conf, err := tlsconfig.TLSConfig() + if err != nil { + return nil, fmt.Errorf("could not load tlsConfig %w", err) + } + clientOpts.TLSClientConfig = conf + } + + return httpcli.Cli(clientOpts), nil +} + +func (p *PromScraper) ScrapeURL(u string) error { + req, err := p.newRequest(u) + if err != nil { + return err + } + + resp, err := p.client.Do(req) + if err != nil { + return err + } + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("unexpected status code returned when scraping %q: %d", u, resp.StatusCode) + } + defer resp.Body.Close() //nolint + + return p.ParserStream(resp.Body) +} + +func (p *PromScraper) ParserStream(in io.Reader) error { + defaultTimestamp := time.Unix(0, 0).UnixNano() / 1e6 + isGzipped := false + + return ParseStream(in, defaultTimestamp, isGzipped, p.callbackForRow) +} + +func (p *PromScraper) callbackForRow(rows []Row) error { + var pts []*point.Point + opts := point.DefaultMetricOptions() + + for _, row := range rows { + measurementName, metricsName := p.splitMetricsName(row.Metric) + var kvs point.KVs + kvs = kvs.Add(metricsName, row.Value, false, true) + + for key, value := range p.opt.extraTags { + kvs = kvs.AddTag(key, value) + } + for _, tag := range row.Tags { + kvs = kvs.AddTag(tag.Key, tag.Value) + } + + pts = append(pts, point.NewPointV2(measurementName, kvs, opts...)) + } + + return p.opt.callback(pts) +} + +func (p *PromScraper) newRequest(u string) (*http.Request, error) { + req, err := http.NewRequest("GET", u, nil) + for k, v := range p.opt.headers { + req.Header.Set(k, v) + } + req.Header.Set("Accept", "text/plain;version=0.0.4;q=1,*/*;q=0.1") + return req, err +} + +func (p *PromScraper) splitMetricsName(name string) (measurementName, metricsName string) { + if p.opt.measurement != "" { + return p.opt.measurement, name + } + + startPosition := strings.IndexFunc(name, func(r rune) bool { + return r != '_' + }) + if startPosition == -1 || startPosition == len(name)-1 { + return "unknown", "unknown" + } + + name = name[startPosition:] + // By default, measurement name and metric name are split according to the first '_' met. + index := strings.Index(name, "_") + + switch index { + case -1: + return name, name + case 0: + return name[index:], name[index:] + case len(name) - 1: + return name[:index], name[:index] + } + + // If the keepExistMetricName is true, keep the raw value for field names. + if p.opt.keepExistMetricName { + return name[:index], name + } + return name[:index], name[index+1:] +} diff --git a/internal/promscrape/scraper_test.go b/internal/promscrape/scraper_test.go new file mode 100644 index 0000000000..ee043a82ff --- /dev/null +++ b/internal/promscrape/scraper_test.go @@ -0,0 +1,79 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +package promscrape + +import ( + "bytes" + "fmt" + "testing" + + "github.com/GuanceCloud/cliutils/point" + "github.com/stretchr/testify/assert" +) + +const ( + mockHeader = ` +# HELP datakit_http_worker_number The number of the worker +# TYPE datakit_http_worker_number gauge +` + mockBody = ` +datakit_http_worker_number{category="metric",domain="dataway.testing.com",status="%d", } 11.0 +datakit_http_worker_number{category="metric",domain="dataway.testing.com",status="%d", } 12.2 +datakit_http_worker_number{category="metric",domain="dataway.testing.com",status="%d", } 13.0 +datakit_http_worker_number{category="metric",domain="dataway.testing.com",status="%d", } 14.2 +datakit_http_worker_number{category="metric",domain="dataway.testing.com",status="%d", } 15.0 +` +) + +func TestParseStream(t *testing.T) { + count := 0 + run := func() { + var buf bytes.Buffer + buf.WriteString(mockHeader) + for i := 0; i < 10000; i++ { + buf.WriteString(fmt.Sprintf(mockBody, i, i, i, i, i)) + } + p := &PromScraper{ + opt: &option{ + measurement: "testing-meas", + extraTags: map[string]string{"key-01": "value-01"}, + callback: func(pts []*point.Point) error { + count += len(pts) + return nil + }, + }, + } + err := p.ParserStream(&buf) + assert.NoError(t, err) + } + + run() + t.Logf("count: %d\n", count) +} + +func BenchmarkParseStream(b *testing.B) { + var buf bytes.Buffer + buf.WriteString(mockHeader) + for i := 0; i < 10000; i++ { + buf.WriteString(fmt.Sprintf(mockBody, i, i, i, i, i)) + } + + p := &PromScraper{ + opt: &option{ + measurement: "testing-meas", + extraTags: map[string]string{"key-01": "value-01"}, + callback: func(pts []*point.Point) error { + return nil + }, + }, + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := p.ParserStream(&buf) + assert.NoError(b, err) + } +} diff --git a/vendor/github.com/GuanceCloud/cliutils/pipeline/ptinput/ptwindow/pt_window.go b/vendor/github.com/GuanceCloud/cliutils/pipeline/ptinput/ptwindow/pt_window.go index ecee68425a..01394cc14c 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pipeline/ptinput/ptwindow/pt_window.go +++ b/vendor/github.com/GuanceCloud/cliutils/pipeline/ptinput/ptwindow/pt_window.go @@ -22,8 +22,6 @@ type PtRing struct { ring []*point.Point pos int - notNil int - elemLimit int } @@ -31,23 +29,16 @@ func (w *PtRing) put(pt *point.Point) { if w.pos >= len(w.ring) { w.pos = 0 } + if w.ring[w.pos] != nil { - PutbackPoints(pt) + PutbackPoints(w.ring[w.pos]) } - w.ring[w.pos] = pt - if pt != nil { - w.notNil++ - } + w.pos++ } func (w *PtRing) clean() []*point.Point { - if w.notNil == 0 { - return nil - } - - w.notNil = 0 var r []*point.Point for i := 0; i < len(w.ring); i++ { diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/cfg/cfg.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/cfg/cfg.go index 0eaaed52f2..cecfcda3bb 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/cfg/cfg.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/cfg/cfg.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package cfg is used to control the behavior of this package. package cfg import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/events/type.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/events/type.go index e99d068787..c1dee38b50 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/events/type.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/events/type.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package events define the pprof event types. package events import ( @@ -13,8 +19,8 @@ const ( ) const ( - CpuSamples Type = "cpu-samples" - CpuTime Type = "cpu-time" + CPUSamples Type = "cpu-samples" + CPUTime Type = "cpu-time" WallTime Type = "wall-time" HeapLiveSize Type = "heap-space" HeapLiveObjects Type = "heap-live-objects" @@ -45,7 +51,7 @@ var TypeProfileFilename = map[languages.Lang]map[Type]string{ } var Metas = map[Type]TypeMetadata{ - CpuSamples: { + CPUSamples: { Sort: sortMap{languages.Python: 0, languages.GoLang: 0, languages.NodeJS: 0}, Name: "CPU Samples", Description: descriptionMap{languages.Any: "This is the number of samples each method spent running on the CPU."}, @@ -53,7 +59,7 @@ var Metas = map[Type]TypeMetadata{ ShowPlaces: ShowNoWay, }, - CpuTime: { + CPUTime: { Sort: sortMap{languages.Python: 10, languages.GoLang: 10, languages.DotNet: 10}, //map[languages.Lang]int{languages.Python: 10, languages.GoLang: 10}, Name: "CPU Time", Description: descriptionMap{languages.Any: "This is the time each method spent running on the CPU."}, @@ -254,13 +260,9 @@ type TypeMetadata struct { type ShowPlace int -// sortMap used to generate sort map for convenience +// sortMap is used to generate a sorted map for convenience. type sortMap map[languages.Lang]int -func newSortMap() sortMap { - return make(sortMap) -} - func (sm sortMap) put(lang languages.Lang, sort int) sortMap { sm[lang] = sort return sm @@ -268,10 +270,6 @@ func (sm sortMap) put(lang languages.Lang, sort int) sortMap { type descriptionMap map[languages.Lang]string -func newDescriptionMap() descriptionMap { - return make(descriptionMap) -} - func (dm descriptionMap) put(lang languages.Lang, desc string) descriptionMap { dm[lang] = desc return dm diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/languages/lang.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/languages/lang.go index b0b5511180..381af4252a 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/languages/lang.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/languages/lang.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package languages define the supported languages. package languages import "strings" diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/parameter/parameter.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/parameter/parameter.go index 85b7b0e510..49e565a686 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/parameter/parameter.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/parameter/parameter.go @@ -1,8 +1,15 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package parameter defines the parsing parameters. package parameter import ( "errors" "fmt" + "github.com/GuanceCloud/cliutils/pprofparser/domain/languages" "github.com/GuanceCloud/cliutils/pprofparser/tools/jsontoolkit" ) diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/pprof/frame.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/pprof/frame.go index b11f630b60..1fb6fd9cb5 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/pprof/frame.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/pprof/frame.go @@ -1,3 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package pprof import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/pprof/summary.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/pprof/summary.go index 6702343e89..21e26b1361 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/pprof/summary.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/pprof/summary.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package pprof declares the pprof util methods. package pprof import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/kind.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/kind.go index ca205ff73c..ebea7be8d5 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/kind.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/kind.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package quantity defines the unit and quantity kinds. package quantity var ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/quantity.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/quantity.go index 4f349687f3..1b2a11179f 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/quantity.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/quantity.go @@ -1,3 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package quantity import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/unit.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/unit.go index 46a552a84c..8a147c400c 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/unit.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/quantity/unit.go @@ -1,3 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package quantity import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/tracing/tracing.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/tracing/tracing.go index 16c2509a80..0bc07fdb24 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/tracing/tracing.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/domain/tracing/tracing.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package tracing defines the trace and span entities. package tracing var ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/aggregators.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/aggregators.go index c34ab6e345..7c62300428 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/aggregators.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/aggregators.go @@ -1,3 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package parsing import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/collapse.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/collapse.go index 8ed189a339..4f32bc2718 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/collapse.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/collapse.go @@ -1,9 +1,13 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package parsing import ( "bufio" "fmt" - "github.com/GuanceCloud/cliutils/pprofparser/domain/tracing" "os" "regexp" "strconv" @@ -13,6 +17,7 @@ import ( "github.com/GuanceCloud/cliutils/pprofparser/domain/parameter" "github.com/GuanceCloud/cliutils/pprofparser/domain/pprof" "github.com/GuanceCloud/cliutils/pprofparser/domain/quantity" + "github.com/GuanceCloud/cliutils/pprofparser/domain/tracing" "github.com/GuanceCloud/cliutils/pprofparser/service/storage" "github.com/GuanceCloud/cliutils/pprofparser/tools/filepathtoolkit" "github.com/GuanceCloud/cliutils/pprofparser/tools/logtoolkit" @@ -57,18 +62,18 @@ func summary(filename string) (map[events.Type]*EventSummary, error) { if err != nil { return nil, fmt.Errorf("open profile file [%s] fail: %w", filename, err) } - defer f.Close() + defer f.Close() //nolint:errcheck sampleSummary := &EventSummary{ SummaryValueType: &SummaryValueType{ - Type: events.CpuSamples, + Type: events.CPUSamples, Unit: quantity.CountUnit, }, Value: 0, } spySummaries := map[events.Type]*EventSummary{ - events.CpuSamples: sampleSummary, + events.CPUSamples: sampleSummary, } scanner := bufio.NewScanner(f) @@ -144,7 +149,7 @@ func (p *Collapse) ResolveFlameGraph(_ events.Type) (*pprof.Frame, AggregatorSel if err != nil { return nil, nil, fmt.Errorf("open py-spy profile file fail: %w", err) } - defer f.Close() + defer f.Close() //nolint:errcheck scanner := bufio.NewScanner(f) @@ -305,7 +310,7 @@ func ParseRawFlameGraph(filename string) (*pprof.Frame, AggregatorSelectSlice, e if err != nil { return nil, nil, fmt.Errorf("open py-spy profile file fail: %w", err) } - defer f.Close() + defer f.Close() //nolint:errcheck scanner := bufio.NewScanner(f) diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/display.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/display.go index 09165a881c..fd95122a36 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/display.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/display.go @@ -1,3 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package parsing import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/metadata.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/metadata.go index 038e7bf9e8..15fca597f3 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/metadata.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/metadata.go @@ -1,3 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package parsing import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/parser.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/parser.go index b471aeee3f..345c0d58a6 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/parser.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/parser.go @@ -1,7 +1,13 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package parsing import ( "fmt" + "github.com/GuanceCloud/cliutils/pprofparser/domain/events" "github.com/GuanceCloud/cliutils/pprofparser/domain/languages" "github.com/GuanceCloud/cliutils/pprofparser/domain/parameter" diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/pprof.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/pprof.go index d70a16ca61..b81edca4fd 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/pprof.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/pprof.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package parsing is the core pprof parsing package. package parsing import ( @@ -304,14 +310,14 @@ func (p *PProf) Summary() (map[events.Type]*EventSummary, int64, error) { // 需要进行span过滤 if p.filterBySpan { spanID := parsetoolkit.GetStringLabel(sample, LabelSpanID) - rootSpanId := parsetoolkit.GetStringLabel(sample, LabelLocalRootSpanID) + rootSpanID := parsetoolkit.GetStringLabel(sample, LabelLocalRootSpanID) // 没有spanID的数据去掉 if spanID == "" { continue } if p.spanIDSet != nil { if p.spanIDSet == tracing.AllTraceSpanSet { - if rootSpanId != p.span.SpanID { + if rootSpanID != p.span.SpanID { continue } } else if !p.spanIDSet.Contains(spanID) { @@ -352,7 +358,7 @@ func parseAndClose(r io.Reader) (*profile.Profile, error) { } if closable, ok := r.(io.Closer); ok { - defer closable.Close() + defer closable.Close() // nolint:errcheck } goPprof, err := profile.Parse(r) @@ -428,12 +434,12 @@ func (p *PProf) ResolveFlameGraph(eventType events.Type) (*pprof.Frame, Aggregat // span 过滤,必须有spanID的才显示 if p.filterBySpan { spanID := parsetoolkit.GetStringLabel(smp, LabelSpanID) - rootSpanId := parsetoolkit.GetStringLabel(smp, LabelLocalRootSpanID) + rootSpanID := parsetoolkit.GetStringLabel(smp, LabelLocalRootSpanID) if spanID == "" { continue } if p.spanIDSet == tracing.AllTraceSpanSet { - if rootSpanId != p.span.SpanID { + if rootSpanID != p.span.SpanID { continue } } else if p.spanIDSet != nil { diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/sampletype.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/sampletype.go index bbf543aaae..27451f2dc8 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/sampletype.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/parsing/sampletype.go @@ -1,7 +1,13 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package parsing import ( "fmt" + "github.com/GuanceCloud/cliutils/pprofparser/domain/events" "github.com/GuanceCloud/cliutils/pprofparser/domain/languages" ) @@ -43,8 +49,8 @@ type fileSampleTypesMap map[string]map[string]events.Type var pyPprofTypeMaps = fileSampleTypesMap{ "prof|auto|*.pprof": { - // "cpu-samples": events.CpuSamples, - "cpu-time": events.CpuTime, + // "cpu-samples": events.CPUSamples, + "cpu-time": events.CPUTime, "wall-time": events.WallTime, "exception-samples": events.ThrownExceptions, "lock-acquire": events.LockAcquires, @@ -58,7 +64,7 @@ var pyPprofTypeMaps = fileSampleTypesMap{ } var dotnetPProfEventMaps = fileSampleTypesMap{ "prof|auto|*.pprof": { - "cpu": events.CpuTime, + "cpu": events.CPUTime, "exception": events.ThrownExceptions, "alloc-samples": events.Allocations, "alloc-size": events.AllocatedMemory, @@ -72,8 +78,8 @@ var dotnetPProfEventMaps = fileSampleTypesMap{ var phpEventMaps = fileSampleTypesMap{ "prof|auto|*.pprof": { - "cpu-time": events.CpuTime, - "sample": events.CpuSamples, + "cpu-time": events.CPUTime, + "sample": events.CPUSamples, "wall-time": events.WallTime, "alloc-samples": events.Allocations, "alloc-size": events.AllocatedMemory, @@ -82,7 +88,7 @@ var phpEventMaps = fileSampleTypesMap{ var nodejsEventMaps = fileSampleTypesMap{ "cpu.pprof": { - "": events.CpuSamples, + "": events.CPUSamples, }, "inuse_objects.pprof": { @@ -96,7 +102,7 @@ var nodejsEventMaps = fileSampleTypesMap{ var goPprofTypeMaps = fileSampleTypesMap{ "cpu.pprof|*cpu.pprof*": { - "cpu": events.CpuTime, + "cpu": events.CPUTime, }, "delta-heap.pprof|*delta-heap.pprof*": { "alloc_objects": events.Allocations, diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/disk.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/disk.go index 2e5f2246ae..2934b398b7 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/disk.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/disk.go @@ -1,3 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package storage import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/oss.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/oss.go index def14138b1..a2a0633584 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/oss.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/oss.go @@ -1,3 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package storage import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/storage.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/storage.go index 59ae6f66fb..81c766513f 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/storage.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/service/storage/storage.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package storage implements a series of storage kinds. package storage import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/filepathtoolkit/format.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/filepathtoolkit/format.go index 3ee033e7c9..dfb3e5a05c 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/filepathtoolkit/format.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/filepathtoolkit/format.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package filepathtoolkit is a collection of filepath utils. package filepathtoolkit import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/filepathtoolkit/stat.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/filepathtoolkit/stat.go index ff23436355..d1d62608aa 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/filepathtoolkit/stat.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/filepathtoolkit/stat.go @@ -1,3 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package filepathtoolkit import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/jsontoolkit/cast.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/jsontoolkit/cast.go index d11ee142e8..b7dcbef94c 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/jsontoolkit/cast.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/jsontoolkit/cast.go @@ -1,3 +1,8 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + package jsontoolkit import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/jsontoolkit/json.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/jsontoolkit/json.go index a5aeff3b11..ad58a47fd4 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/jsontoolkit/json.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/jsontoolkit/json.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package jsontoolkit is a collection of json utils. package jsontoolkit import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/logtoolkit/logger.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/logtoolkit/logger.go index c034491541..4873d842be 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/logtoolkit/logger.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/logtoolkit/logger.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package logtoolkit is a collection of log utils. package logtoolkit import ( diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/mathtoolkit/math.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/mathtoolkit/math.go index d7c416d0f3..e25226c881 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/mathtoolkit/math.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/mathtoolkit/math.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package mathtoolkit is a collection of math utils. package mathtoolkit import "math" diff --git a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/parsetoolkit/pprof.go b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/parsetoolkit/pprof.go index 6df4bb9aa7..a187887bb9 100644 --- a/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/parsetoolkit/pprof.go +++ b/vendor/github.com/GuanceCloud/cliutils/pprofparser/tools/parsetoolkit/pprof.go @@ -1,3 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the MIT License. +// This product includes software developed at Guance Cloud (https://www.guance.com/). +// Copyright 2021-present Guance, Inc. + +// Package parsetoolkit is a collection of parsing utils. package parsetoolkit import ( diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml index 7a008a4d23..a22953805c 100644 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -3,7 +3,6 @@ before: hooks: - ./gen.sh - - go install mvdan.cc/garble@v0.9.3 builds: - @@ -32,7 +31,6 @@ builds: - mips64le goarm: - 7 - gobinary: garble - id: "s2d" binary: s2d @@ -59,7 +57,6 @@ builds: - mips64le goarm: - 7 - gobinary: garble - id: "s2sx" binary: s2sx @@ -87,21 +84,11 @@ builds: - mips64le goarm: - 7 - gobinary: garble archives: - id: s2-binaries - name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}" - replacements: - aix: AIX - darwin: OSX - linux: Linux - windows: Windows - 386: i386 - amd64: x86_64 - freebsd: FreeBSD - netbsd: NetBSD + name_template: "s2-{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" format_overrides: - goos: windows format: zip @@ -125,7 +112,7 @@ changelog: nfpms: - - file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}" + file_name_template: "s2_package__{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" vendor: Klaus Post homepage: https://github.com/klauspost/compress maintainer: Klaus Post @@ -134,8 +121,3 @@ nfpms: formats: - deb - rpm - replacements: - darwin: Darwin - linux: Linux - freebsd: FreeBSD - amd64: x86_64 diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 4002a16a63..05c7359e48 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -16,6 +16,54 @@ This package provides various compression algorithms. # changelog +* Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6) + * zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923 + * s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925 + +* Jan 26th, 2024 - [v1.17.5](https://github.com/klauspost/compress/releases/tag/v1.17.5) + * flate: Fix reset with dictionary on custom window encodes https://github.com/klauspost/compress/pull/912 + * zstd: Add Frame header encoding and stripping https://github.com/klauspost/compress/pull/908 + * zstd: Limit better/best default window to 8MB https://github.com/klauspost/compress/pull/913 + * zstd: Speed improvements by @greatroar in https://github.com/klauspost/compress/pull/896 https://github.com/klauspost/compress/pull/910 + * s2: Fix callbacks for skippable blocks and disallow 0xfe (Padding) by @Jille in https://github.com/klauspost/compress/pull/916 https://github.com/klauspost/compress/pull/917 +https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/compress/pull/918 + +* Dec 1st, 2023 - [v1.17.4](https://github.com/klauspost/compress/releases/tag/v1.17.4) + * huff0: Speed up symbol counting by @greatroar in https://github.com/klauspost/compress/pull/887 + * huff0: Remove byteReader by @greatroar in https://github.com/klauspost/compress/pull/886 + * gzhttp: Allow overriding decompression on transport https://github.com/klauspost/compress/pull/892 + * gzhttp: Clamp compression level https://github.com/klauspost/compress/pull/890 + * gzip: Error out if reserved bits are set https://github.com/klauspost/compress/pull/891 + +* Nov 15th, 2023 - [v1.17.3](https://github.com/klauspost/compress/releases/tag/v1.17.3) + * fse: Fix max header size https://github.com/klauspost/compress/pull/881 + * zstd: Improve better/best compression https://github.com/klauspost/compress/pull/877 + * gzhttp: Fix missing content type on Close https://github.com/klauspost/compress/pull/883 + +* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2) + * zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876 + +* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1) + * s2: Fix S2 "best" dictionary wrong encoding by @klauspost in https://github.com/klauspost/compress/pull/871 + * flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869 + * s2: Fix EstimateBlockSize on 6&7 length input by @klauspost in https://github.com/klauspost/compress/pull/867 + +* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0) + * Add experimental dictionary builder https://github.com/klauspost/compress/pull/853 + * Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838 + * flate: Add limited window compression https://github.com/klauspost/compress/pull/843 + * s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839 + * flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837 + * gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860 + +
+ See changes to v1.16.x + + +* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7) + * zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829 + * s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832 + * June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6) * zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806 * zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824 @@ -49,7 +97,11 @@ This package provides various compression algorithms. * s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748 * s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747 * s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746 +
+
+ See changes to v1.15.x + * Jan 21st, 2023 (v1.15.15) * deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739 * zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728 @@ -176,6 +228,8 @@ Stream decompression is now faster on asynchronous, since the goroutine allocati While the release has been extensively tested, it is recommended to testing when upgrading. +
+
See changes to v1.14.x @@ -511,6 +565,8 @@ the stateless compress described below. For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). +To disable all assembly add `-tags=noasm`. This works across all packages. + # Stateless compression This package offers stateless compression as a special option for gzip/deflate. @@ -529,7 +585,7 @@ For direct deflate use, NewStatelessWriter and StatelessDeflate are available. S A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer: -``` +```go // replace 'ioutil.Discard' with your output. gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression) if err != nil { @@ -636,6 +692,8 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv * [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer. * [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression. * [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression. +* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index. +* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor. # license diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 5faea0b2b3..66d1657d2c 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -7,6 +7,7 @@ package flate import ( "encoding/binary" + "errors" "fmt" "io" "math" @@ -211,7 +212,7 @@ func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error { // Should only be used after a start/reset. func (d *compressor) fillWindow(b []byte) { // Do not fill window if we are in store-only or huffman mode. - if d.level <= 0 { + if d.level <= 0 && d.level > -MinCustomWindowSize { return } if d.fast != nil { @@ -833,6 +834,12 @@ func (d *compressor) init(w io.Writer, level int) (err error) { d.initDeflate() d.fill = (*compressor).fillDeflate d.step = (*compressor).deflateLazy + case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize: + d.w.logNewTablePenalty = 7 + d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize} + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeFast default: return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) } @@ -929,6 +936,28 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { return zw, err } +// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow. +const MinCustomWindowSize = 32 + +// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow. +const MaxCustomWindowSize = windowSize + +// NewWriterWindow returns a new Writer compressing data with a custom window size. +// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize. +func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) { + if windowSize < MinCustomWindowSize { + return nil, errors.New("flate: requested window size less than MinWindowSize") + } + if windowSize > MaxCustomWindowSize { + return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize") + } + var dw Writer + if err := dw.d.init(w, -windowSize); err != nil { + return nil, err + } + return &dw, nil +} + // A Writer takes data written to it and writes the compressed // form of that data to an underlying writer (see NewWriter). type Writer struct { diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go index 24caf5f70b..c8124b5c49 100644 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -8,7 +8,6 @@ package flate import ( "encoding/binary" "fmt" - "math/bits" ) type fastEnc interface { @@ -192,25 +191,3 @@ func (e *fastGen) Reset() { } e.hist = e.hist[:0] } - -// matchLen returns the maximum length. -// 'a' must be the shortest of the two. -func matchLen(a, b []byte) int { - var checked int - - for len(a) >= 8 { - if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { - return checked + (bits.TrailingZeros64(diff) >> 3) - } - checked += 8 - a = a[8:] - b = b[8:] - } - b = b[:len(a)] - for i := range a { - if a[i] != b[i] { - return i + checked - } - } - return len(a) + checked -} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 414c0bea9f..2f410d64f5 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -120,8 +120,9 @@ func (h *huffmanDecoder) init(lengths []int) bool { const sanity = false if h.chunks == nil { - h.chunks = &[huffmanNumChunks]uint16{} + h.chunks = new([huffmanNumChunks]uint16) } + if h.maxRead != 0 { *h = huffmanDecoder{chunks: h.chunks, links: h.links} } @@ -175,6 +176,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { } h.maxRead = min + chunks := h.chunks[:] for i := range chunks { chunks[i] = 0 @@ -202,8 +204,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { if cap(h.links[off]) < numLinks { h.links[off] = make([]uint16, numLinks) } else { - links := h.links[off][:0] - h.links[off] = links[:numLinks] + h.links[off] = h.links[off][:numLinks] } } } else { @@ -277,7 +278,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { return true } -// The actual read interface needed by NewReader. +// Reader is the actual read interface needed by NewReader. // If the passed in io.Reader does not also have ReadByte, // the NewReader will introduce its own buffering. type Reader interface { @@ -285,6 +286,18 @@ type Reader interface { io.ByteReader } +type step uint8 + +const ( + copyData step = iota + 1 + nextBlock + huffmanBytesBuffer + huffmanBytesReader + huffmanBufioReader + huffmanStringsReader + huffmanGenericReader +) + // Decompress state. type decompressor struct { // Input source. @@ -303,7 +316,7 @@ type decompressor struct { // Next step in the decompression, // and decompression state. - step func(*decompressor) + step step stepState int err error toRead []byte @@ -342,7 +355,7 @@ func (f *decompressor) nextBlock() { // compressed, fixed Huffman tables f.hl = &fixedHuffmanDecoder f.hd = nil - f.huffmanBlockDecoder()() + f.huffmanBlockDecoder() if debugDecode { fmt.Println("predefinied huffman block") } @@ -353,7 +366,7 @@ func (f *decompressor) nextBlock() { } f.hl = &f.h1 f.hd = &f.h2 - f.huffmanBlockDecoder()() + f.huffmanBlockDecoder() if debugDecode { fmt.Println("dynamic huffman block") } @@ -379,14 +392,16 @@ func (f *decompressor) Read(b []byte) (int, error) { if f.err != nil { return 0, f.err } - f.step(f) + + f.doStep() + if f.err != nil && len(f.toRead) == 0 { f.toRead = f.dict.readFlush() // Flush what's left in case of error } } } -// Support the io.WriteTo interface for io.Copy and friends. +// WriteTo implements the io.WriteTo interface for io.Copy and friends. func (f *decompressor) WriteTo(w io.Writer) (int64, error) { total := int64(0) flushed := false @@ -410,7 +425,7 @@ func (f *decompressor) WriteTo(w io.Writer) (int64, error) { return total, f.err } if f.err == nil { - f.step(f) + f.doStep() } if len(f.toRead) == 0 && f.err != nil && !flushed { f.toRead = f.dict.readFlush() // Flush what's left in case of error @@ -631,7 +646,7 @@ func (f *decompressor) copyData() { if f.dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = f.dict.readFlush() - f.step = (*decompressor).copyData + f.step = copyData return } f.finishBlock() @@ -644,7 +659,28 @@ func (f *decompressor) finishBlock() { } f.err = io.EOF } - f.step = (*decompressor).nextBlock + f.step = nextBlock +} + +func (f *decompressor) doStep() { + switch f.step { + case copyData: + f.copyData() + case nextBlock: + f.nextBlock() + case huffmanBytesBuffer: + f.huffmanBytesBuffer() + case huffmanBytesReader: + f.huffmanBytesReader() + case huffmanBufioReader: + f.huffmanBufioReader() + case huffmanStringsReader: + f.huffmanStringsReader() + case huffmanGenericReader: + f.huffmanGenericReader() + default: + panic("BUG: unexpected step state") + } } // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. @@ -747,7 +783,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error { h1: f.h1, h2: f.h2, dict: f.dict, - step: (*decompressor).nextBlock, + step: nextBlock, } f.dict.init(maxMatchOffset, dict) return nil @@ -768,7 +804,7 @@ func NewReader(r io.Reader) io.ReadCloser { f.r = makeReader(r) f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock + f.step = nextBlock f.dict.init(maxMatchOffset, nil) return &f } @@ -787,7 +823,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { f.r = makeReader(r) f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock + f.step = nextBlock f.dict.init(maxMatchOffset, dict) return &f } diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go index 61342b6b88..2b2f993f75 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -85,7 +85,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesBuffer + f.step = huffmanBytesBuffer f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -251,7 +251,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work + f.step = huffmanBytesBuffer // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -336,7 +336,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesReader + f.step = huffmanBytesReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -502,7 +502,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesReader // We need to continue this work + f.step = huffmanBytesReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -587,7 +587,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBufioReader + f.step = huffmanBufioReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -753,7 +753,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBufioReader // We need to continue this work + f.step = huffmanBufioReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -838,7 +838,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanStringsReader + f.step = huffmanStringsReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -1004,7 +1004,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.step = huffmanStringsReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -1089,7 +1089,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanGenericReader + f.step = huffmanGenericReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -1255,7 +1255,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanGenericReader // We need to continue this work + f.step = huffmanGenericReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -1265,19 +1265,19 @@ copyHistory: // Not reached } -func (f *decompressor) huffmanBlockDecoder() func() { +func (f *decompressor) huffmanBlockDecoder() { switch f.r.(type) { case *bytes.Buffer: - return f.huffmanBytesBuffer + f.huffmanBytesBuffer() case *bytes.Reader: - return f.huffmanBytesReader + f.huffmanBytesReader() case *bufio.Reader: - return f.huffmanBufioReader + f.huffmanBufioReader() case *strings.Reader: - return f.huffmanStringsReader + f.huffmanStringsReader() case Reader: - return f.huffmanGenericReader + f.huffmanGenericReader() default: - return f.huffmanGenericReader + f.huffmanGenericReader() } } diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go index 83ef50ba45..1f61ec1829 100644 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -308,3 +308,401 @@ emitRemainder: emitLiteral(dst, src[nextEmit:]) } } + +// fastEncL5Window is a level 5 encoder, +// but with a custom window size. +type fastEncL5Window struct { + hist []byte + cur int32 + maxOffset int32 + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL5Window) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 + ) + maxMatchOffset := e.maxOffset + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hashLen(cv, tableBits, hashShortBytes) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + nextHashS = hashLen(next, tableBits, hashShortBytes) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + lCandidate = e.bTable[nextHashL] + // Store the next match + + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // If the next long is a candidate, use that... + t2 := lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + if l == 0 { + // Extend the 4-byte match as long as possible. + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Try to locate a better match by checking the end of best match... + if sAt := s + l; l < 30 && sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 + eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset + t2 := eLong - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 + if t2 >= 0 && off < maxMatchOffset && off > 0 { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { + t = t2 + l = l2 + s = s2 + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + goto emitRemainder + } + + // Store every 3rd hash in-between. + if true { + const hashEvery = 3 + i := s - l + 1 + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // Do an long at i+1 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + eLong = &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // We only have enough bits for a short entry at i+2 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + + // Skip one - otherwise we risk hitting 's' + i += 4 + for ; i < s-1; i += hashEvery { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hashLen(x, tableBits, hashShortBytes) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + eLong := &e.bTable[prevHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} + +// Reset the encoding table. +func (e *fastEncL5Window) Reset() { + // We keep the same allocs, since we are compressing the same block sizes. + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= int32(bufferReset) { + e.cur += e.maxOffset + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} + +func (e *fastEncL5Window) addBlock(src []byte) int32 { + // check if we have space already + maxMatchOffset := e.maxOffset + + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.hist = make([]byte, 0, allocHistory) + } else { + if cap(e.hist) < int(maxMatchOffset*2) { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - maxMatchOffset + copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:maxMatchOffset] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// matchlen will return the match length between offsets and t in src. +// The maximum length returned is maxMatchLength - 4. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > e.maxOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + s1 := int(s) + maxMatchLength - 4 + if s1 > len(src) { + s1 = len(src) + } + + // Extend the match to be as long as possible. + return int32(matchLen(src[s:s1], src[t:])) +} + +// matchlenLong will return the match length between offsets and t in src. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 { + if debugDeflate { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > e.maxOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go new file mode 100644 index 0000000000..4bd3885841 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go @@ -0,0 +1,16 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package flate + +// matchLen returns how many bytes match in a and b +// +// It assumes that: +// +// len(a) <= len(b) and len(a) > 0 +// +//go:noescape +func matchLen(a []byte, b []byte) int diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s new file mode 100644 index 0000000000..0782b86e3d --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s @@ -0,0 +1,66 @@ +// Copied from S2 implementation. + +//go:build !appengine && !noasm && gc && !noasm + +#include "textflag.h" + +// func matchLen(a []byte, b []byte) int +TEXT ·matchLen(SB), NOSPLIT, $0-56 + MOVQ a_base+0(FP), AX + MOVQ b_base+24(FP), CX + MOVQ a_len+8(FP), DX + + // matchLen + XORL SI, SI + CMPL DX, $0x08 + JB matchlen_match4_standalone + +matchlen_loopback_standalone: + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JZ matchlen_loop_standalone + +#ifdef GOAMD64_v3 + TZCNTQ BX, BX +#else + BSFQ BX, BX +#endif + SHRL $0x03, BX + LEAL (SI)(BX*1), SI + JMP gen_match_len_end + +matchlen_loop_standalone: + LEAL -8(DX), DX + LEAL 8(SI), SI + CMPL DX, $0x08 + JAE matchlen_loopback_standalone + +matchlen_match4_standalone: + CMPL DX, $0x04 + JB matchlen_match2_standalone + MOVL (AX)(SI*1), BX + CMPL (CX)(SI*1), BX + JNE matchlen_match2_standalone + LEAL -4(DX), DX + LEAL 4(SI), SI + +matchlen_match2_standalone: + CMPL DX, $0x02 + JB matchlen_match1_standalone + MOVW (AX)(SI*1), BX + CMPW (CX)(SI*1), BX + JNE matchlen_match1_standalone + LEAL -2(DX), DX + LEAL 2(SI), SI + +matchlen_match1_standalone: + CMPL DX, $0x01 + JB gen_match_len_end + MOVB (AX)(SI*1), BL + CMPB (CX)(SI*1), BL + JNE gen_match_len_end + INCL SI + +gen_match_len_end: + MOVQ SI, ret+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_generic.go b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go new file mode 100644 index 0000000000..ad5cd814b9 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go @@ -0,0 +1,33 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package flate + +import ( + "encoding/binary" + "math/bits" +) + +// matchLen returns the maximum common prefix length of a and b. +// a must be the shortest of the two. +func matchLen(a, b []byte) (n int) { + for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { + diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 + } + n += 8 + } + + for i := range a { + if a[i] != b[i] { + break + } + n++ + } + return n + +} diff --git a/vendor/github.com/klauspost/compress/fse/bitwriter.go b/vendor/github.com/klauspost/compress/fse/bitwriter.go index 43e463611b..e82fa3bb7b 100644 --- a/vendor/github.com/klauspost/compress/fse/bitwriter.go +++ b/vendor/github.com/klauspost/compress/fse/bitwriter.go @@ -152,12 +152,11 @@ func (b *bitWriter) flushAlign() { // close will write the alignment bit and write the final byte(s) // to the output. -func (b *bitWriter) close() error { +func (b *bitWriter) close() { // End mark b.addBits16Clean(1, 1) // flush until next byte. b.flushAlign() - return nil } // reset and continue writing by appending to out. diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go index dac97e58a2..074018d8f9 100644 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -199,7 +199,8 @@ func (s *Scratch) compress(src []byte) error { c2.flush(s.actualTableLog) c1.flush(s.actualTableLog) - return s.bw.close() + s.bw.close() + return nil } // writeCount will write the normalized histogram count to header. @@ -211,7 +212,7 @@ func (s *Scratch) writeCount() error { previous0 bool charnum uint16 - maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3 // Write Table Size bitStream = uint32(tableLog - minTablelog) diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go index 6d630c390d..00a0a2c386 100644 --- a/vendor/github.com/klauspost/compress/gzip/gunzip.go +++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go @@ -106,6 +106,7 @@ func (z *Reader) Reset(r io.Reader) error { *z = Reader{ decompressor: z.decompressor, multistream: true, + br: z.br, } if rr, ok := r.(flate.Reader); ok { z.r = rr @@ -237,6 +238,11 @@ func (z *Reader) readHeader() (hdr Header, err error) { } } + // Reserved FLG bits must be zero. + if flg>>5 != 0 { + return hdr, ErrHeader + } + z.digest = 0 if z.decompressor == nil { z.decompressor = flate.NewReader(z.r) diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go index 26203851bd..5bc720593e 100644 --- a/vendor/github.com/klauspost/compress/gzip/gzip.go +++ b/vendor/github.com/klauspost/compress/gzip/gzip.go @@ -74,6 +74,27 @@ func NewWriterLevel(w io.Writer, level int) (*Writer, error) { return z, nil } +// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow. +const MinCustomWindowSize = flate.MinCustomWindowSize + +// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow. +const MaxCustomWindowSize = flate.MaxCustomWindowSize + +// NewWriterWindow returns a new Writer compressing data with a custom window size. +// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize. +func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) { + if windowSize < MinCustomWindowSize { + return nil, errors.New("gzip: requested window size less than MinWindowSize") + } + if windowSize > MaxCustomWindowSize { + return nil, errors.New("gzip: requested window size bigger than MaxCustomWindowSize") + } + + z := new(Writer) + z.init(w, -windowSize) + return z, nil +} + func (z *Writer) init(w io.Writer, level int) { compressor := z.compressor if level != StatelessCompression { diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go index b4d7164e3f..0ebc9aaac7 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go +++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go @@ -94,10 +94,9 @@ func (b *bitWriter) flushAlign() { // close will write the alignment bit and write the final byte(s) // to the output. -func (b *bitWriter) close() error { +func (b *bitWriter) close() { // End mark b.addBits16Clean(1, 1) // flush until next byte. b.flushAlign() - return nil } diff --git a/vendor/github.com/klauspost/compress/huff0/bytereader.go b/vendor/github.com/klauspost/compress/huff0/bytereader.go deleted file mode 100644 index 4dcab8d232..0000000000 --- a/vendor/github.com/klauspost/compress/huff0/bytereader.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -package huff0 - -// byteReader provides a byte reader that reads -// little endian values from a byte stream. -// The input stream is manually advanced. -// The reader performs no bounds checks. -type byteReader struct { - b []byte - off int -} - -// init will initialize the reader and set the input. -func (b *byteReader) init(in []byte) { - b.b = in - b.off = 0 -} - -// Int32 returns a little endian int32 starting at current offset. -func (b byteReader) Int32() int32 { - v3 := int32(b.b[b.off+3]) - v2 := int32(b.b[b.off+2]) - v1 := int32(b.b[b.off+1]) - v0 := int32(b.b[b.off]) - return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0 -} - -// Uint32 returns a little endian uint32 starting at current offset. -func (b byteReader) Uint32() uint32 { - v3 := uint32(b.b[b.off+3]) - v2 := uint32(b.b[b.off+2]) - v1 := uint32(b.b[b.off+1]) - v0 := uint32(b.b[b.off]) - return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0 -} - -// remain will return the number of bytes remaining. -func (b byteReader) remain() int { - return len(b.b) - b.off -} diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index 4ee4fa18dd..84aa3d12f0 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -227,10 +227,10 @@ func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err err } func (s *Scratch) compress1X(src []byte) ([]byte, error) { - return s.compress1xDo(s.Out, src) + return s.compress1xDo(s.Out, src), nil } -func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { +func (s *Scratch) compress1xDo(dst, src []byte) []byte { var bw = bitWriter{out: dst} // N is length divisible by 4. @@ -260,8 +260,8 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { bw.encTwoSymbols(cTable, tmp[1], tmp[0]) } } - err := bw.close() - return bw.out, err + bw.close() + return bw.out } var sixZeros [6]byte @@ -283,12 +283,8 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) { } src = src[len(toDo):] - var err error idx := len(s.Out) - s.Out, err = s.compress1xDo(s.Out, toDo) - if err != nil { - return nil, err - } + s.Out = s.compress1xDo(s.Out, toDo) if len(s.Out)-idx > math.MaxUint16 { // We cannot store the size in the jump table return nil, ErrIncompressible @@ -315,7 +311,6 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { segmentSize := (len(src) + 3) / 4 var wg sync.WaitGroup - var errs [4]error wg.Add(4) for i := 0; i < 4; i++ { toDo := src @@ -326,15 +321,12 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { // Separate goroutine for each block. go func(i int) { - s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo) + s.tmpOut[i] = s.compress1xDo(s.tmpOut[i][:0], toDo) wg.Done() }(i) } wg.Wait() for i := 0; i < 4; i++ { - if errs[i] != nil { - return nil, errs[i] - } o := s.tmpOut[i] if len(o) > math.MaxUint16 { // We cannot store the size in the jump table @@ -358,6 +350,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { // Does not update s.clearCount. func (s *Scratch) countSimple(in []byte) (max int, reuse bool) { reuse = true + _ = s.count // Assert that s != nil to speed up the following loop. for _, v := range in { s.count[v]++ } @@ -423,7 +416,7 @@ func (s *Scratch) validateTable(c cTable) bool { // minTableLog provides the minimum logSize to safely represent a distribution. func (s *Scratch) minTableLog() uint8 { - minBitsSrc := highBit32(uint32(s.br.remain())) + 1 + minBitsSrc := highBit32(uint32(s.srcLen)) + 1 minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2 if minBitsSrc < minBitsSymbols { return uint8(minBitsSrc) @@ -435,7 +428,7 @@ func (s *Scratch) minTableLog() uint8 { func (s *Scratch) optimalTableLog() { tableLog := s.TableLog minBits := s.minTableLog() - maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1 + maxBitsSrc := uint8(highBit32(uint32(s.srcLen-1))) - 1 if maxBitsSrc < tableLog { // Accuracy can be reduced tableLog = maxBitsSrc diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go index e8ad17ad08..77ecd68e0a 100644 --- a/vendor/github.com/klauspost/compress/huff0/huff0.go +++ b/vendor/github.com/klauspost/compress/huff0/huff0.go @@ -88,7 +88,7 @@ type Scratch struct { // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. MaxDecodedSize int - br byteReader + srcLen int // MaxSymbolValue will override the maximum symbol value of the next block. MaxSymbolValue uint8 @@ -170,7 +170,7 @@ func (s *Scratch) prepare(in []byte) (*Scratch, error) { if s.fse == nil { s.fse = &fse.Scratch{} } - s.br.init(in) + s.srcLen = len(in) return s, nil } diff --git a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go index 2aa6a95a02..2754bac6f1 100644 --- a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go +++ b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go @@ -51,7 +51,7 @@ func emitCopy(dst []byte, offset, length int) int { i := 0 // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The // threshold for this loop is a little higher (at 68 = 64 + 4), and the - // length emitted down below is is a little lower (at 60 = 64 - 4), because + // length emitted down below is a little lower (at 60 = 64 - 4), because // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as diff --git a/vendor/github.com/klauspost/compress/s2sx.mod b/vendor/github.com/klauspost/compress/s2sx.mod index 2263853fca..5a4412f907 100644 --- a/vendor/github.com/klauspost/compress/s2sx.mod +++ b/vendor/github.com/klauspost/compress/s2sx.mod @@ -1,4 +1,4 @@ module github.com/klauspost/compress -go 1.16 +go 1.19 diff --git a/vendor/github.com/klauspost/compress/zlib/reader.go b/vendor/github.com/klauspost/compress/zlib/reader.go index f127d47767..cb652b9089 100644 --- a/vendor/github.com/klauspost/compress/zlib/reader.go +++ b/vendor/github.com/klauspost/compress/zlib/reader.go @@ -26,6 +26,7 @@ package zlib import ( "bufio" "compress/zlib" + "encoding/binary" "hash" "hash/adler32" "io" @@ -33,7 +34,10 @@ import ( "github.com/klauspost/compress/flate" ) -const zlibDeflate = 8 +const ( + zlibDeflate = 8 + zlibMaxWindow = 7 +) var ( // ErrChecksum is returned when reading ZLIB data that has an invalid checksum. @@ -52,7 +56,7 @@ type reader struct { scratch [4]byte } -// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to +// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict] // to switch to a new underlying Reader. This permits reusing a ReadCloser // instead of allocating a new one. type Resetter interface { @@ -63,20 +67,20 @@ type Resetter interface { // NewReader creates a new ReadCloser. // Reads from the returned ReadCloser read and decompress data from r. -// If r does not implement io.ByteReader, the decompressor may read more +// If r does not implement [io.ByteReader], the decompressor may read more // data than necessary from r. // It is the caller's responsibility to call Close on the ReadCloser when done. // -// The ReadCloser returned by NewReader also implements Resetter. +// The [io.ReadCloser] returned by NewReader also implements [Resetter]. func NewReader(r io.Reader) (io.ReadCloser, error) { return NewReaderDict(r, nil) } -// NewReaderDict is like NewReader but uses a preset dictionary. +// NewReaderDict is like [NewReader] but uses a preset dictionary. // NewReaderDict ignores the dictionary if the compressed data does not refer to it. -// If the compressed data refers to a different dictionary, NewReaderDict returns ErrDictionary. +// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary]. // -// The ReadCloser returned by NewReaderDict also implements Resetter. +// The ReadCloser returned by NewReaderDict also implements [Resetter]. func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) { z := new(reader) err := z.Reset(r, dict) @@ -108,7 +112,7 @@ func (z *reader) Read(p []byte) (int, error) { return n, z.err } // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). - checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3]) + checksum := binary.BigEndian.Uint32(z.scratch[:4]) if checksum != z.digest.Sum32() { z.err = ErrChecksum return n, z.err @@ -116,9 +120,9 @@ func (z *reader) Read(p []byte) (int, error) { return n, io.EOF } -// Calling Close does not close the wrapped io.Reader originally passed to NewReader. +// Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader]. // In order for the ZLIB checksum to be verified, the reader must be -// fully consumed until the io.EOF. +// fully consumed until the [io.EOF]. func (z *reader) Close() error { if z.err != nil && z.err != io.EOF { return z.err @@ -128,7 +132,7 @@ func (z *reader) Close() error { } func (z *reader) Reset(r io.Reader, dict []byte) error { - *z = reader{decompressor: z.decompressor, digest: z.digest} + *z = reader{decompressor: z.decompressor} if fr, ok := r.(flate.Reader); ok { z.r = fr } else { @@ -143,8 +147,8 @@ func (z *reader) Reset(r io.Reader, dict []byte) error { } return z.err } - h := uint(z.scratch[0])<<8 | uint(z.scratch[1]) - if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) { + h := binary.BigEndian.Uint16(z.scratch[:2]) + if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) { z.err = ErrHeader return z.err } @@ -157,7 +161,7 @@ func (z *reader) Reset(r io.Reader, dict []byte) error { } return z.err } - checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3]) + checksum := binary.BigEndian.Uint32(z.scratch[:4]) if checksum != adler32.Checksum(dict) { z.err = ErrDictionary return z.err diff --git a/vendor/github.com/klauspost/compress/zlib/writer.go b/vendor/github.com/klauspost/compress/zlib/writer.go index 605816ba4f..cab9ef3eb0 100644 --- a/vendor/github.com/klauspost/compress/zlib/writer.go +++ b/vendor/github.com/klauspost/compress/zlib/writer.go @@ -5,6 +5,7 @@ package zlib import ( + "encoding/binary" "fmt" "hash" "hash/adler32" @@ -20,7 +21,7 @@ const ( BestSpeed = flate.BestSpeed BestCompression = flate.BestCompression DefaultCompression = flate.DefaultCompression - ConstantCompression = flate.ConstantCompression + ConstantCompression = flate.ConstantCompression // Deprecated: Use HuffmanOnly. HuffmanOnly = flate.HuffmanOnly ) @@ -40,7 +41,7 @@ type Writer struct { // NewWriter creates a new Writer. // Writes to the returned Writer are compressed and written to w. // -// It is the caller's responsibility to call Close on the WriteCloser when done. +// It is the caller's responsibility to call Close on the Writer when done. // Writes may be buffered and not flushed until Close. func NewWriter(w io.Writer) *Writer { z, _ := NewWriterLevelDict(w, DefaultCompression, nil) @@ -116,17 +117,13 @@ func (z *Writer) writeHeader() (err error) { if z.dict != nil { z.scratch[1] |= 1 << 5 } - z.scratch[1] += uint8(31 - (uint16(z.scratch[0])<<8+uint16(z.scratch[1]))%31) + z.scratch[1] += uint8(31 - binary.BigEndian.Uint16(z.scratch[:2])%31) if _, err = z.w.Write(z.scratch[0:2]); err != nil { return err } if z.dict != nil { // The next four bytes are the Adler-32 checksum of the dictionary. - checksum := adler32.Checksum(z.dict) - z.scratch[0] = uint8(checksum >> 24) - z.scratch[1] = uint8(checksum >> 16) - z.scratch[2] = uint8(checksum >> 8) - z.scratch[3] = uint8(checksum >> 0) + binary.BigEndian.PutUint32(z.scratch[:], adler32.Checksum(z.dict)) if _, err = z.w.Write(z.scratch[0:4]); err != nil { return err } @@ -192,10 +189,7 @@ func (z *Writer) Close() error { } checksum := z.digest.Sum32() // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). - z.scratch[0] = uint8(checksum >> 24) - z.scratch[1] = uint8(checksum >> 16) - z.scratch[2] = uint8(checksum >> 8) - z.scratch[3] = uint8(checksum >> 0) + binary.BigEndian.PutUint32(z.scratch[:], checksum) _, z.err = z.w.Write(z.scratch[0:4]) return z.err } diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index bdd49c8b25..92e2347bbc 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -259,7 +259,7 @@ nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68 ## Decompressor -Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested. +Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested. This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz), kindly supplied by [fuzzit.dev](https://fuzzit.dev/). diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go index 97299d499c..25ca983941 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitreader.go +++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go @@ -17,7 +17,6 @@ import ( // for aligning the input. type bitReader struct { in []byte - off uint // next byte to read is at in[off - 1] value uint64 // Maybe use [16]byte, but shifting is awkward. bitsRead uint8 } @@ -28,7 +27,6 @@ func (b *bitReader) init(in []byte) error { return errors.New("corrupt stream: too short") } b.in = in - b.off = uint(len(in)) // The highest bit of the last byte indicates where to start v := in[len(in)-1] if v == 0 { @@ -69,21 +67,19 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // 2 bounds checks. - v := b.in[b.off-4:] - v = v[:4] + v := b.in[len(b.in)-4:] + b.in = b.in[:len(b.in)-4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 - b.off -= 4 } // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. func (b *bitReader) fillFastStart() { - // Do single re-slice to avoid bounds checks. - b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + v := b.in[len(b.in)-8:] + b.in = b.in[:len(b.in)-8] + b.value = binary.LittleEndian.Uint64(v) b.bitsRead = 0 - b.off -= 8 } // fill() will make sure at least 32 bits are available. @@ -91,25 +87,25 @@ func (b *bitReader) fill() { if b.bitsRead < 32 { return } - if b.off >= 4 { - v := b.in[b.off-4:] - v = v[:4] + if len(b.in) >= 4 { + v := b.in[len(b.in)-4:] + b.in = b.in[:len(b.in)-4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 - b.off -= 4 return } - for b.off > 0 { - b.value = (b.value << 8) | uint64(b.in[b.off-1]) - b.bitsRead -= 8 - b.off-- + + b.bitsRead -= uint8(8 * len(b.in)) + for len(b.in) > 0 { + b.value = (b.value << 8) | uint64(b.in[len(b.in)-1]) + b.in = b.in[:len(b.in)-1] } } // finished returns true if all bits have been read from the bit stream. func (b *bitReader) finished() bool { - return b.off == 0 && b.bitsRead >= 64 + return len(b.in) == 0 && b.bitsRead >= 64 } // overread returns true if more bits have been requested than is on the stream. @@ -119,7 +115,7 @@ func (b *bitReader) overread() bool { // remain returns the number of bits remaining. func (b *bitReader) remain() uint { - return b.off*8 + 64 - uint(b.bitsRead) + return 8*uint(len(b.in)) + 64 - uint(b.bitsRead) } // close the bitstream and returns an error if out-of-buffer reads occurred. diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go index 78b3c61be3..1952f175b0 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go +++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go @@ -97,12 +97,11 @@ func (b *bitWriter) flushAlign() { // close will write the alignment bit and write the final byte(s) // to the output. -func (b *bitWriter) close() error { +func (b *bitWriter) close() { // End mark b.addBits16Clean(1, 1) // flush until next byte. b.flushAlign() - return nil } // reset and continue writing by appending to out. diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 9f17ce601f..03744fbc76 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -554,6 +554,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { if debugDecoder { printf("Compression modes: 0b%b", compMode) } + if compMode&3 != 0 { + return errors.New("corrupt block: reserved bits not zero") + } for i := uint(0); i < 3; i++ { mode := seqCompMode((compMode >> (6 - i*2)) & 3) if debugDecoder { diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index fd4a36f730..32a7f401d5 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -361,14 +361,21 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { if len(lits) >= 1024 { // Use 4 Streams. out, reUsed, err = huff0.Compress4X(lits, b.litEnc) - } else if len(lits) > 32 { + } else if len(lits) > 16 { // Use 1 stream single = true out, reUsed, err = huff0.Compress1X(lits, b.litEnc) } else { err = huff0.ErrIncompressible } - + if err == nil && len(out)+5 > len(lits) { + // If we are close, we may still be worse or equal to raw. + var lh literalsHeader + lh.setSizes(len(out), len(lits), single) + if len(out)+lh.size() >= len(lits) { + err = huff0.ErrIncompressible + } + } switch err { case huff0.ErrIncompressible: if debugEncoder { @@ -420,6 +427,16 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { return nil } +// encodeRLE will encode an RLE block. +func (b *blockEnc) encodeRLE(val byte, length uint32) { + var bh blockHeader + bh.setLast(b.last) + bh.setSize(length) + bh.setType(blockTypeRLE) + b.output = bh.appendTo(b.output) + b.output = append(b.output, val) +} + // fuzzFseEncoder can be used to fuzz the FSE encoder. func fuzzFseEncoder(data []byte) int { if len(data) > maxSequences || len(data) < 2 { @@ -472,6 +489,16 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if len(b.sequences) == 0 { return b.encodeLits(b.literals, rawAllLits) } + if len(b.sequences) == 1 && len(org) > 0 && len(b.literals) <= 1 { + // Check common RLE cases. + seq := b.sequences[0] + if seq.litLen == uint32(len(b.literals)) && seq.offset-3 == 1 { + // Offset == 1 and 0 or 1 literals. + b.encodeRLE(org[0], b.sequences[0].matchLen+zstdMinMatch+seq.litLen) + return nil + } + } + // We want some difference to at least account for the headers. saved := b.size - len(b.literals) - (b.size >> 6) if saved < 16 { @@ -503,7 +530,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if len(b.literals) >= 1024 && !raw { // Use 4 Streams. out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) - } else if len(b.literals) > 32 && !raw { + } else if len(b.literals) > 16 && !raw { // Use 1 stream single = true out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) @@ -511,6 +538,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { err = huff0.ErrIncompressible } + if err == nil && len(out)+5 > len(b.literals) { + // If we are close, we may still be worse or equal to raw. + var lh literalsHeader + lh.setSize(len(b.literals)) + szRaw := lh.size() + lh.setSizes(len(out), len(b.literals), single) + szComp := lh.size() + if len(out)+szComp >= len(b.literals)+szRaw { + err = huff0.ErrIncompressible + } + } switch err { case huff0.ErrIncompressible: lh.setType(literalsBlockRaw) @@ -773,10 +811,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { ml.flush(mlEnc.actualTableLog) of.flush(ofEnc.actualTableLog) ll.flush(llEnc.actualTableLog) - err = wr.close() - if err != nil { - return err - } + wr.close() b.output = wr.out // Maybe even add a bigger margin. diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go index f6a240970d..6a5a2988b6 100644 --- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go +++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go @@ -95,42 +95,54 @@ type Header struct { // If there isn't enough input, io.ErrUnexpectedEOF is returned. // The FirstBlock.OK will indicate if enough information was available to decode the first block header. func (h *Header) Decode(in []byte) error { + _, err := h.DecodeAndStrip(in) + return err +} + +// DecodeAndStrip will decode the header from the beginning of the stream +// and on success return the remaining bytes. +// This will decode the frame header and the first block header if enough bytes are provided. +// It is recommended to provide at least HeaderMaxSize bytes. +// If the frame header cannot be read an error will be returned. +// If there isn't enough input, io.ErrUnexpectedEOF is returned. +// The FirstBlock.OK will indicate if enough information was available to decode the first block header. +func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) { *h = Header{} if len(in) < 4 { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } h.HeaderSize += 4 b, in := in[:4], in[4:] if string(b) != frameMagic { if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 { - return ErrMagicMismatch + return nil, ErrMagicMismatch } if len(in) < 4 { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } h.HeaderSize += 4 h.Skippable = true h.SkippableID = int(b[0] & 0xf) h.SkippableSize = binary.LittleEndian.Uint32(in) - return nil + return in[4:], nil } // Read Window_Descriptor // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor if len(in) < 1 { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } fhd, in := in[0], in[1:] h.HeaderSize++ h.SingleSegment = fhd&(1<<5) != 0 h.HasCheckSum = fhd&(1<<2) != 0 if fhd&(1<<3) != 0 { - return errors.New("reserved bit set on frame header") + return nil, errors.New("reserved bit set on frame header") } if !h.SingleSegment { if len(in) < 1 { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } var wd byte wd, in = in[0], in[1:] @@ -148,7 +160,7 @@ func (h *Header) Decode(in []byte) error { size = 4 } if len(in) < int(size) { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } b, in = in[:size], in[size:] h.HeaderSize += int(size) @@ -178,7 +190,7 @@ func (h *Header) Decode(in []byte) error { if fcsSize > 0 { h.HasFCS = true if len(in) < fcsSize { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } b, in = in[:fcsSize], in[fcsSize:] h.HeaderSize += int(fcsSize) @@ -199,7 +211,7 @@ func (h *Header) Decode(in []byte) error { // Frame Header done, we will not fail from now on. if len(in) < 3 { - return nil + return in, nil } tmp := in[:3] bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) @@ -209,7 +221,7 @@ func (h *Header) Decode(in []byte) error { cSize := int(bh >> 3) switch blockType { case blockTypeReserved: - return nil + return in, nil case blockTypeRLE: h.FirstBlock.Compressed = true h.FirstBlock.DecompressedSize = cSize @@ -225,5 +237,25 @@ func (h *Header) Decode(in []byte) error { } h.FirstBlock.OK = true - return nil + return in, nil +} + +// AppendTo will append the encoded header to the dst slice. +// There is no error checking performed on the header values. +func (h *Header) AppendTo(dst []byte) ([]byte, error) { + if h.Skippable { + magic := [4]byte{0x50, 0x2a, 0x4d, 0x18} + magic[0] |= byte(h.SkippableID & 0xf) + dst = append(dst, magic[:]...) + f := h.SkippableSize + return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil + } + f := frameHeader{ + ContentSize: h.FrameContentSize, + WindowSize: uint32(h.WindowSize), + SingleSegment: h.SingleSegment, + Checksum: h.HasCheckSum, + DictID: h.DictionaryID, + } + return f.appendTo(dst), nil } diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index f04aaa21eb..bbca17234a 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -82,7 +82,7 @@ var ( // can run multiple concurrent stateless decodes. It is even possible to // use stateless decodes while a stream is being decoded. // -// The Reset function can be used to initiate a new stream, which is will considerably +// The Reset function can be used to initiate a new stream, which will considerably // reduce the allocations normally caused by NewReader. func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { initPredefined() diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go index ca0951452e..b7b83164bc 100644 --- a/vendor/github.com/klauspost/compress/zstd/dict.go +++ b/vendor/github.com/klauspost/compress/zstd/dict.go @@ -1,10 +1,13 @@ package zstd import ( + "bytes" "encoding/binary" "errors" "fmt" "io" + "math" + "sort" "github.com/klauspost/compress/huff0" ) @@ -14,9 +17,8 @@ type dict struct { litEnc *huff0.Scratch llDec, ofDec, mlDec sequenceDec - //llEnc, ofEnc, mlEnc []*fseEncoder - offsets [3]int - content []byte + offsets [3]int + content []byte } const dictMagic = "\x37\xa4\x30\xec" @@ -159,3 +161,405 @@ func InspectDictionary(b []byte) (interface { d, err := loadDict(b) return d, err } + +type BuildDictOptions struct { + // Dictionary ID. + ID uint32 + + // Content to use to create dictionary tables. + Contents [][]byte + + // History to use for all blocks. + History []byte + + // Offsets to use. + Offsets [3]int + + // CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier. + // See https://github.com/facebook/zstd/issues/3724 + CompatV155 bool + + // Use the specified encoder level. + // The dictionary will be built using the specified encoder level, + // which will reflect speed and make the dictionary tailored for that level. + // If not set SpeedBestCompression will be used. + Level EncoderLevel + + // DebugOut will write stats and other details here if set. + DebugOut io.Writer +} + +func BuildDict(o BuildDictOptions) ([]byte, error) { + initPredefined() + hist := o.History + contents := o.Contents + debug := o.DebugOut != nil + println := func(args ...interface{}) { + if o.DebugOut != nil { + fmt.Fprintln(o.DebugOut, args...) + } + } + printf := func(s string, args ...interface{}) { + if o.DebugOut != nil { + fmt.Fprintf(o.DebugOut, s, args...) + } + } + print := func(args ...interface{}) { + if o.DebugOut != nil { + fmt.Fprint(o.DebugOut, args...) + } + } + + if int64(len(hist)) > dictMaxLength { + return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength)) + } + if len(hist) < 8 { + return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8) + } + if len(contents) == 0 { + return nil, errors.New("no content provided") + } + d := dict{ + id: o.ID, + litEnc: nil, + llDec: sequenceDec{}, + ofDec: sequenceDec{}, + mlDec: sequenceDec{}, + offsets: o.Offsets, + content: hist, + } + block := blockEnc{lowMem: false} + block.init() + enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}}) + if o.Level != 0 { + eOpts := encoderOptions{ + level: o.Level, + blockSize: maxMatchLen, + windowSize: maxMatchLen, + dict: &d, + lowMem: false, + } + enc = eOpts.encoder() + } else { + o.Level = SpeedBestCompression + } + var ( + remain [256]int + ll [256]int + ml [256]int + of [256]int + ) + addValues := func(dst *[256]int, src []byte) { + for _, v := range src { + dst[v]++ + } + } + addHist := func(dst *[256]int, src *[256]uint32) { + for i, v := range src { + dst[i] += int(v) + } + } + seqs := 0 + nUsed := 0 + litTotal := 0 + newOffsets := make(map[uint32]int, 1000) + for _, b := range contents { + block.reset(nil) + if len(b) < 8 { + continue + } + nUsed++ + enc.Reset(&d, true) + enc.Encode(&block, b) + addValues(&remain, block.literals) + litTotal += len(block.literals) + if len(block.sequences) == 0 { + continue + } + seqs += len(block.sequences) + block.genCodes() + addHist(&ll, block.coders.llEnc.Histogram()) + addHist(&ml, block.coders.mlEnc.Histogram()) + addHist(&of, block.coders.ofEnc.Histogram()) + for i, seq := range block.sequences { + if i > 3 { + break + } + offset := seq.offset + if offset == 0 { + continue + } + if int(offset) >= len(o.History) { + continue + } + if offset > 3 { + newOffsets[offset-3]++ + } else { + newOffsets[uint32(o.Offsets[offset-1])]++ + } + } + } + // Find most used offsets. + var sortedOffsets []uint32 + for k := range newOffsets { + sortedOffsets = append(sortedOffsets, k) + } + sort.Slice(sortedOffsets, func(i, j int) bool { + a, b := sortedOffsets[i], sortedOffsets[j] + if a == b { + // Prefer the longer offset + return sortedOffsets[i] > sortedOffsets[j] + } + return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]] + }) + if len(sortedOffsets) > 3 { + if debug { + print("Offsets:") + for i, v := range sortedOffsets { + if i > 20 { + break + } + printf("[%d: %d],", v, newOffsets[v]) + } + println("") + } + + sortedOffsets = sortedOffsets[:3] + } + for i, v := range sortedOffsets { + o.Offsets[i] = int(v) + } + if debug { + println("New repeat offsets", o.Offsets) + } + + if nUsed == 0 || seqs == 0 { + return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs) + } + if debug { + println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal) + } + if seqs/nUsed < 512 { + // Use 512 as minimum. + nUsed = seqs / 512 + if nUsed == 0 { + nUsed = 1 + } + } + copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) { + hist := dst.Histogram() + var maxSym uint8 + var maxCount int + var fakeLength int + for i, v := range src { + if v > 0 { + v = v / nUsed + if v == 0 { + v = 1 + } + } + if v > maxCount { + maxCount = v + } + if v != 0 { + maxSym = uint8(i) + } + fakeLength += v + hist[i] = uint32(v) + } + + // Ensure we aren't trying to represent RLE. + if maxCount == fakeLength { + for i := range hist { + if uint8(i) == maxSym { + fakeLength++ + maxSym++ + hist[i+1] = 1 + if maxSym > 1 { + break + } + } + if hist[0] == 0 { + fakeLength++ + hist[i] = 1 + if maxSym > 1 { + break + } + } + } + } + + dst.HistogramFinished(maxSym, maxCount) + dst.reUsed = false + dst.useRLE = false + err := dst.normalizeCount(fakeLength) + if err != nil { + return nil, err + } + if debug { + println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength) + } + return dst.writeCount(nil) + } + if debug { + print("Literal lengths: ") + } + llTable, err := copyHist(block.coders.llEnc, &ll) + if err != nil { + return nil, err + } + if debug { + print("Match lengths: ") + } + mlTable, err := copyHist(block.coders.mlEnc, &ml) + if err != nil { + return nil, err + } + if debug { + print("Offsets: ") + } + ofTable, err := copyHist(block.coders.ofEnc, &of) + if err != nil { + return nil, err + } + + // Literal table + avgSize := litTotal + if avgSize > huff0.BlockSizeMax/2 { + avgSize = huff0.BlockSizeMax / 2 + } + huffBuff := make([]byte, 0, avgSize) + // Target size + div := litTotal / avgSize + if div < 1 { + div = 1 + } + if debug { + println("Huffman weights:") + } + for i, n := range remain[:] { + if n > 0 { + n = n / div + // Allow all entries to be represented. + if n == 0 { + n = 1 + } + huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) + if debug { + printf("[%d: %d], ", i, n) + } + } + } + if o.CompatV155 && remain[255]/div == 0 { + huffBuff = append(huffBuff, 255) + } + scratch := &huff0.Scratch{TableLog: 11} + for tries := 0; tries < 255; tries++ { + scratch = &huff0.Scratch{TableLog: 11} + _, _, err = huff0.Compress1X(huffBuff, scratch) + if err == nil { + break + } + if debug { + printf("Try %d: Huffman error: %v\n", tries+1, err) + } + huffBuff = huffBuff[:0] + if tries == 250 { + if debug { + println("Huffman: Bailing out with predefined table") + } + + // Bail out.... Just generate something + huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...) + for i := 0; i < 128; i++ { + huffBuff = append(huffBuff, byte(i)) + } + continue + } + if errors.Is(err, huff0.ErrIncompressible) { + // Try truncating least common. + for i, n := range remain[:] { + if n > 0 { + n = n / (div * (i + 1)) + if n > 0 { + huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) + } + } + } + if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 { + huffBuff = append(huffBuff, 255) + } + if len(huffBuff) == 0 { + huffBuff = append(huffBuff, 0, 255) + } + } + if errors.Is(err, huff0.ErrUseRLE) { + for i, n := range remain[:] { + n = n / (div * (i + 1)) + // Allow all entries to be represented. + if n == 0 { + n = 1 + } + huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) + } + } + } + + var out bytes.Buffer + out.Write([]byte(dictMagic)) + out.Write(binary.LittleEndian.AppendUint32(nil, o.ID)) + out.Write(scratch.OutTable) + if debug { + println("huff table:", len(scratch.OutTable), "bytes") + println("of table:", len(ofTable), "bytes") + println("ml table:", len(mlTable), "bytes") + println("ll table:", len(llTable), "bytes") + } + out.Write(ofTable) + out.Write(mlTable) + out.Write(llTable) + out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0]))) + out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1]))) + out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2]))) + out.Write(hist) + if debug { + _, err := loadDict(out.Bytes()) + if err != nil { + panic(err) + } + i, err := InspectDictionary(out.Bytes()) + if err != nil { + panic(err) + } + println("ID:", i.ID()) + println("Content size:", i.ContentSize()) + println("Encoder:", i.LitEncoder() != nil) + println("Offsets:", i.Offsets()) + var totalSize int + for _, b := range contents { + totalSize += len(b) + } + + encWith := func(opts ...EOption) int { + enc, err := NewWriter(nil, opts...) + if err != nil { + panic(err) + } + defer enc.Close() + var dst []byte + var totalSize int + for _, b := range contents { + dst = enc.EncodeAll(b, dst[:0]) + totalSize += len(dst) + } + return totalSize + } + plain := encWith(WithEncoderLevel(o.Level)) + withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes())) + println("Input size:", totalSize) + println("Plain Compressed:", plain) + println("Dict Compressed:", withDict) + println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)") + } + return out.Bytes(), nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index 9819d41453..4613724e9d 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) { if m.rep < 0 { ofc = ofCode(uint32(m.s-m.offset) + 3) } else { - ofc = ofCode(uint32(m.rep)) + ofc = ofCode(uint32(m.rep) & 3) } // Cost, excluding ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc] @@ -135,8 +135,20 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { break } + // Add block to history s := e.addBlock(src) blk.size = len(src) + + // Check RLE first + if len(src) > zstdMinMatch { + ml := matchLen(src[1:], src) + if ml == len(src)-1 { + blk.literals = append(blk.literals, src[0]) + blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3}) + return + } + } + if len(src) < minNonLiteralBlockSize { blk.extraLits = len(src) blk.literals = blk.literals[:len(src)] @@ -197,17 +209,10 @@ encodeLoop: // Set m to a match at offset if it looks like that will improve compression. improve := func(m *match, offset int32, s int32, first uint32, rep int32) { - if s-offset >= e.maxMatchOff || load3232(src, offset) != first { + delta := s - offset + if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first { return } - if debugAsserts { - if offset <= 0 { - panic(offset) - } - if !bytes.Equal(src[s:s+4], src[offset:offset+4]) { - panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) - } - } // Try to quick reject if we already have a long match. if m.length > 16 { left := len(src) - int(m.s+m.length) @@ -226,8 +231,10 @@ encodeLoop: } } l := 4 + e.matchlen(s+4, offset+4, src) - if rep < 0 { + if m.rep <= 0 { // Extend candidate match backwards as far as possible. + // Do not extend repeats as we can assume they are optimal + // and offsets change if s == nextEmit. tMin := s - e.maxMatchOff if tMin < 0 { tMin = 0 @@ -238,7 +245,14 @@ encodeLoop: l++ } } - + if debugAsserts { + if offset >= s { + panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff)) + } + if !bytes.Equal(src[s:s+l], src[offset:offset+l]) { + panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) + } + } cand := match{offset: offset, s: s, length: l, rep: rep} cand.estBits(bitsPerByte) if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 { @@ -281,6 +295,7 @@ encodeLoop: // Load next and check... e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset} e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset} + index0 := s + 1 // Look far ahead, unless we have a really long match already... if best.length < goodEnough { @@ -334,41 +349,45 @@ encodeLoop: } if debugAsserts { + if best.offset >= best.s { + panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s)) + } + if best.s < nextEmit { + panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit)) + } + if best.offset < s-e.maxMatchOff { + panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff)) + } if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) { panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length])) } } // We have a match, we can store the forward value + s = best.s if best.rep > 0 { var seq seq seq.matchLen = uint32(best.length - zstdMinMatch) - if debugAsserts && s <= nextEmit { - panic("s <= nextEmit") - } addLiterals(&seq, best.s) // Repeat. If bit 4 is set, this is a non-lit repeat. seq.offset = uint32(best.rep & 3) if debugSequences { - println("repeat sequence", seq, "next s:", s) + println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset) } blk.sequences = append(blk.sequences, seq) // Index old s + 1 -> s - 1 - index0 := s + 1 s = best.s + best.length - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, best.length) - } - break encodeLoop - } + // Index skipped... + end := s + if s > sLimit+4 { + end = sLimit + 4 + } off := index0 + e.cur - for index0 < s { + for index0 < end { cv0 := load6432(src, index0) h0 := hashLen(cv0, bestLongTableBits, bestLongLen) h1 := hashLen(cv0, bestShortTableBits, bestShortLen) @@ -377,6 +396,7 @@ encodeLoop: off++ index0++ } + switch best.rep { case 2, 4 | 1: offset1, offset2 = offset2, offset1 @@ -385,13 +405,17 @@ encodeLoop: case 4 | 3: offset1, offset2, offset3 = offset1-1, offset1, offset2 } + if s >= sLimit { + if debugEncoder { + println("repeat ended", s, best.length) + } + break encodeLoop + } continue } // A 4-byte match has been found. Update recent offsets. // We'll later see if more than 4 bytes. - index0 := s + 1 - s = best.s t := best.offset offset1, offset2, offset3 = s-t, offset1, offset2 @@ -418,19 +442,25 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) nextEmit = s - if s >= sLimit { - break encodeLoop + + // Index old s + 1 -> s - 1 or sLimit + end := s + if s > sLimit-4 { + end = sLimit - 4 } - // Index old s + 1 -> s - 1 - for index0 < s { + off := index0 + e.cur + for index0 < end { cv0 := load6432(src, index0) h0 := hashLen(cv0, bestLongTableBits, bestLongLen) h1 := hashLen(cv0, bestShortTableBits, bestShortLen) - off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} index0++ + off++ + } + if s >= sLimit { + break encodeLoop } } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index 8582f31a7c..a4f5bf91fc 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -102,9 +102,20 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { e.cur = e.maxMatchOff break } - + // Add block to history s := e.addBlock(src) blk.size = len(src) + + // Check RLE first + if len(src) > zstdMinMatch { + ml := matchLen(src[1:], src) + if ml == len(src)-1 { + blk.literals = append(blk.literals, src[0]) + blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3}) + return + } + } + if len(src) < minNonLiteralBlockSize { blk.extraLits = len(src) blk.literals = blk.literals[:len(src)] @@ -145,7 +156,7 @@ encodeLoop: var t int32 // We allow the encoder to optionally turn off repeat offsets across blocks canRepeat := len(blk.sequences) > 2 - var matched int32 + var matched, index0 int32 for { if debugAsserts && canRepeat && offset1 == 0 { @@ -162,6 +173,7 @@ encodeLoop: off := s + e.cur e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + index0 = s + 1 if canRepeat { if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { @@ -258,7 +270,6 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - index0 := s + repOff2 s += lenght + repOff2 nextEmit = s if s >= sLimit { @@ -498,15 +509,15 @@ encodeLoop: } // Index match start+1 (long) -> s - 1 - index0 := s - l + 1 + off := index0 + e.cur for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) - off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 + off += 2 } cv = load6432(src, s) @@ -672,7 +683,7 @@ encodeLoop: var t int32 // We allow the encoder to optionally turn off repeat offsets across blocks canRepeat := len(blk.sequences) > 2 - var matched int32 + var matched, index0 int32 for { if debugAsserts && canRepeat && offset1 == 0 { @@ -691,6 +702,7 @@ encodeLoop: e.markLongShardDirty(nextHashL) e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} e.markShortShardDirty(nextHashS) + index0 = s + 1 if canRepeat { if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { @@ -726,7 +738,6 @@ encodeLoop: blk.sequences = append(blk.sequences, seq) // Index match start+1 (long) -> s - 1 - index0 := s + repOff s += lenght + repOff nextEmit = s @@ -790,7 +801,6 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - index0 := s + repOff2 s += lenght + repOff2 nextEmit = s if s >= sLimit { @@ -1024,18 +1034,18 @@ encodeLoop: } // Index match start+1 (long) -> s - 1 - index0 := s - l + 1 + off := index0 + e.cur for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) - off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.markLongShardDirty(h0) h1 := hashLen(cv1, betterShortTableBits, betterShortLen) e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} e.markShortShardDirty(h1) index0 += 2 + off += 2 } cv = load6432(src, s) diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 4de0aed0d0..72af7ef0fe 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -227,10 +227,7 @@ func (e *Encoder) nextBlock(final bool) error { DictID: e.o.dict.ID(), } - dst, err := fh.appendTo(tmp[:0]) - if err != nil { - return err - } + dst := fh.appendTo(tmp[:0]) s.headerWritten = true s.wWg.Wait() var n2 int @@ -483,7 +480,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { Checksum: false, DictID: 0, } - dst, _ = fh.appendTo(dst) + dst = fh.appendTo(dst) // Write raw block as last one only. var blk blockHeader @@ -518,10 +515,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem { dst = make([]byte, 0, len(src)) } - dst, err := fh.appendTo(dst) - if err != nil { - panic(err) - } + dst = fh.appendTo(dst) // If we can do everything in one block, prefer that. if len(src) <= e.o.blockSize { @@ -581,6 +575,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // Add padding with content from crypto/rand.Reader if e.o.pad > 0 { add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad)) + var err error dst, err = skippableFrame(dst, add, rand.Reader) if err != nil { panic(err) diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index faaf81921c..20671dcb91 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -94,7 +94,7 @@ func WithEncoderConcurrency(n int) EOption { // The value must be a power of two between MinWindowSize and MaxWindowSize. // A larger value will enable better compression but allocate more memory and, // for above-default values, take considerably longer. -// The default value is determined by the compression level. +// The default value is determined by the compression level and max 8MB. func WithWindowSize(n int) EOption { return func(o *encoderOptions) error { switch { @@ -232,9 +232,9 @@ func WithEncoderLevel(l EncoderLevel) EOption { case SpeedDefault: o.windowSize = 8 << 20 case SpeedBetterCompression: - o.windowSize = 16 << 20 + o.windowSize = 8 << 20 case SpeedBestCompression: - o.windowSize = 32 << 20 + o.windowSize = 8 << 20 } } if !o.customALEntropy { diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go index 4ef7f5a3e3..667ca06794 100644 --- a/vendor/github.com/klauspost/compress/zstd/frameenc.go +++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go @@ -22,7 +22,7 @@ type frameHeader struct { const maxHeaderSize = 14 -func (f frameHeader) appendTo(dst []byte) ([]byte, error) { +func (f frameHeader) appendTo(dst []byte) []byte { dst = append(dst, frameMagic...) var fhd uint8 if f.Checksum { @@ -76,7 +76,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) { if f.SingleSegment { dst = append(dst, uint8(f.ContentSize)) } - // Unless SingleSegment is set, framessizes < 256 are nto stored. + // Unless SingleSegment is set, framessizes < 256 are not stored. case 1: f.ContentSize -= 256 dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8)) @@ -88,7 +88,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) { default: panic("invalid fcs") } - return dst, nil + return dst } const skippableFrameHeader = 4 + 4 diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go index 332e51fe44..8adfebb029 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go @@ -20,10 +20,9 @@ func (s *fseDecoder) buildDtable() error { if v == -1 { s.dt[highThreshold].setAddBits(uint8(i)) highThreshold-- - symbolNext[i] = 1 - } else { - symbolNext[i] = uint16(v) + v = 1 } + symbolNext[i] = uint16(v) } } @@ -35,10 +34,12 @@ func (s *fseDecoder) buildDtable() error { for ss, v := range s.norm[:s.symbolLen] { for i := 0; i < int(v); i++ { s.dt[position].setAddBits(uint8(ss)) - position = (position + step) & tableMask - for position > highThreshold { + for { // lowprob area position = (position + step) & tableMask + if position <= highThreshold { + break + } } } } diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s index 17901e0804..ae7d4d3295 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s @@ -162,12 +162,12 @@ finalize: MOVD h, ret+24(FP) RET -// func writeBlocks(d *Digest, b []byte) int +// func writeBlocks(s *Digest, b []byte) int TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 LDP ·primes+0(SB), (prime1, prime2) // Load state. Assume v[1-4] are stored contiguously. - MOVD d+0(FP), digest + MOVD s+0(FP), digest LDP 0(digest), (v1, v2) LDP 16(digest), (v3, v4) diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s index 9a7655c0f7..0782b86e3d 100644 --- a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s @@ -5,7 +5,6 @@ #include "textflag.h" // func matchLen(a []byte, b []byte) int -// Requires: BMI TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX @@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 JB matchlen_match4_standalone matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - TESTQ BX, BX - JZ matchlen_loop_standalone + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JZ matchlen_loop_standalone #ifdef GOAMD64_v3 TZCNTQ BX, BX #else BSFQ BX, BX #endif - SARQ $0x03, BX + SHRL $0x03, BX LEAL (SI)(BX*1), SI JMP gen_match_len_end diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index 9405fcf101..d7fe6d82d9 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -245,7 +245,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error { return io.ErrUnexpectedEOF } var ll, mo, ml int - if br.off > 4+((maxOffsetBits+16+16)>>3) { + if len(br.in) > 4+((maxOffsetBits+16+16)>>3) { // inlined function: // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) @@ -452,18 +452,13 @@ func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) // extra bits are stored in reverse order. br.fill() - if s.maxBits <= 32 { - mo += br.getBits(moB) - ml += br.getBits(mlB) - ll += br.getBits(llB) - } else { - mo += br.getBits(moB) + mo += br.getBits(moB) + if s.maxBits > 32 { br.fill() - // matchlength+literal length, max 32 bits - ml += br.getBits(mlB) - ll += br.getBits(llB) - } + // matchlength+literal length, max 32 bits + ml += br.getBits(mlB) + ll += br.getBits(llB) mo = s.adjustOffset(mo, ll, moB) return } diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index b6f4ba6fc5..5b06174b89 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -5,11 +5,11 @@ // func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: CMOV TEXT ·sequenceDecs_decode_amd64(SB), $8-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -157,8 +157,7 @@ sequenceDecs_decode_amd64_ll_update_zero: // Update Literal Length State MOVBQZX DI, R14 - SHRQ $0x10, DI - MOVWQZX DI, DI + SHRL $0x10, DI LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -177,8 +176,7 @@ sequenceDecs_decode_amd64_ll_update_zero: // Update Match Length State MOVBQZX R8, R14 - SHRQ $0x10, R8 - MOVWQZX R8, R8 + SHRL $0x10, R8 LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -197,8 +195,7 @@ sequenceDecs_decode_amd64_ll_update_zero: // Update Offset State MOVBQZX R9, R14 - SHRQ $0x10, R9 - MOVWQZX R9, R9 + SHRL $0x10, R9 LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -301,9 +298,9 @@ sequenceDecs_decode_amd64_match_len_ofs_ok: MOVQ R12, 152(AX) MOVQ R13, 160(AX) MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -336,11 +333,11 @@ error_overread: // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: CMOV TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -459,8 +456,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero: // Update Literal Length State MOVBQZX DI, R14 - SHRQ $0x10, DI - MOVWQZX DI, DI + SHRL $0x10, DI LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -479,8 +475,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero: // Update Match Length State MOVBQZX R8, R14 - SHRQ $0x10, R8 - MOVWQZX R8, R8 + SHRL $0x10, R8 LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -499,8 +494,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero: // Update Offset State MOVBQZX R9, R14 - SHRQ $0x10, R9 - MOVWQZX R9, R9 + SHRL $0x10, R9 LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -603,9 +597,9 @@ sequenceDecs_decode_56_amd64_match_len_ofs_ok: MOVQ R12, 152(AX) MOVQ R13, 160(AX) MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -638,11 +632,11 @@ error_overread: // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -772,11 +766,10 @@ sequenceDecs_decode_bmi2_fill_2_end: BZHIQ R14, R15, R15 // Update Offset State - BZHIQ R8, R15, CX - SHRXQ R8, R15, R15 - MOVQ $0x00001010, R14 - BEXTRQ R14, R8, R8 - ADDQ CX, R8 + BZHIQ R8, R15, CX + SHRXQ R8, R15, R15 + SHRL $0x10, R8 + ADDQ CX, R8 // Load ctx.ofTable MOVQ ctx+16(FP), CX @@ -784,11 +777,10 @@ sequenceDecs_decode_bmi2_fill_2_end: MOVQ (CX)(R8*8), R8 // Update Match Length State - BZHIQ DI, R15, CX - SHRXQ DI, R15, R15 - MOVQ $0x00001010, R14 - BEXTRQ R14, DI, DI - ADDQ CX, DI + BZHIQ DI, R15, CX + SHRXQ DI, R15, R15 + SHRL $0x10, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX @@ -796,10 +788,9 @@ sequenceDecs_decode_bmi2_fill_2_end: MOVQ (CX)(DI*8), DI // Update Literal Length State - BZHIQ SI, R15, CX - MOVQ $0x00001010, R14 - BEXTRQ R14, SI, SI - ADDQ CX, SI + BZHIQ SI, R15, CX + SHRL $0x10, SI + ADDQ CX, SI // Load ctx.llTable MOVQ ctx+16(FP), CX @@ -892,9 +883,9 @@ sequenceDecs_decode_bmi2_match_len_ofs_ok: MOVQ R11, 152(CX) MOVQ R12, 160(CX) MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -927,11 +918,11 @@ error_overread: // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -1032,11 +1023,10 @@ sequenceDecs_decode_56_bmi2_fill_end: BZHIQ R14, R15, R15 // Update Offset State - BZHIQ R8, R15, CX - SHRXQ R8, R15, R15 - MOVQ $0x00001010, R14 - BEXTRQ R14, R8, R8 - ADDQ CX, R8 + BZHIQ R8, R15, CX + SHRXQ R8, R15, R15 + SHRL $0x10, R8 + ADDQ CX, R8 // Load ctx.ofTable MOVQ ctx+16(FP), CX @@ -1044,11 +1034,10 @@ sequenceDecs_decode_56_bmi2_fill_end: MOVQ (CX)(R8*8), R8 // Update Match Length State - BZHIQ DI, R15, CX - SHRXQ DI, R15, R15 - MOVQ $0x00001010, R14 - BEXTRQ R14, DI, DI - ADDQ CX, DI + BZHIQ DI, R15, CX + SHRXQ DI, R15, R15 + SHRL $0x10, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX @@ -1056,10 +1045,9 @@ sequenceDecs_decode_56_bmi2_fill_end: MOVQ (CX)(DI*8), DI // Update Literal Length State - BZHIQ SI, R15, CX - MOVQ $0x00001010, R14 - BEXTRQ R14, SI, SI - ADDQ CX, SI + BZHIQ SI, R15, CX + SHRL $0x10, SI + ADDQ CX, SI // Load ctx.llTable MOVQ ctx+16(FP), CX @@ -1152,9 +1140,9 @@ sequenceDecs_decode_56_bmi2_match_len_ofs_ok: MOVQ R11, 152(CX) MOVQ R12, 160(CX) MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -1797,11 +1785,11 @@ empty_seqs: // func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: CMOV, SSE TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -1967,8 +1955,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero: // Update Literal Length State MOVBQZX DI, R13 - SHRQ $0x10, DI - MOVWQZX DI, DI + SHRL $0x10, DI LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -1987,8 +1974,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero: // Update Match Length State MOVBQZX R8, R13 - SHRQ $0x10, R8 - MOVWQZX R8, R8 + SHRL $0x10, R8 LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -2007,8 +1993,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero: // Update Offset State MOVBQZX R9, R13 - SHRQ $0x10, R9 - MOVWQZX R9, R9 + SHRL $0x10, R9 LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -2295,9 +2280,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Update the context MOVQ ctx+16(FP), AX @@ -2362,11 +2347,11 @@ error_not_enough_space: // func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: BMI, BMI2, CMOV, SSE TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -2514,11 +2499,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end: BZHIQ R13, R14, R14 // Update Offset State - BZHIQ R8, R14, CX - SHRXQ R8, R14, R14 - MOVQ $0x00001010, R13 - BEXTRQ R13, R8, R8 - ADDQ CX, R8 + BZHIQ R8, R14, CX + SHRXQ R8, R14, R14 + SHRL $0x10, R8 + ADDQ CX, R8 // Load ctx.ofTable MOVQ ctx+16(FP), CX @@ -2526,11 +2510,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end: MOVQ (CX)(R8*8), R8 // Update Match Length State - BZHIQ DI, R14, CX - SHRXQ DI, R14, R14 - MOVQ $0x00001010, R13 - BEXTRQ R13, DI, DI - ADDQ CX, DI + BZHIQ DI, R14, CX + SHRXQ DI, R14, R14 + SHRL $0x10, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX @@ -2538,10 +2521,9 @@ sequenceDecs_decodeSync_bmi2_fill_2_end: MOVQ (CX)(DI*8), DI // Update Literal Length State - BZHIQ SI, R14, CX - MOVQ $0x00001010, R13 - BEXTRQ R13, SI, SI - ADDQ CX, SI + BZHIQ SI, R14, CX + SHRL $0x10, SI + ADDQ CX, SI // Load ctx.llTable MOVQ ctx+16(FP), CX @@ -2818,9 +2800,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Update the context MOVQ ctx+16(FP), AX @@ -2885,11 +2867,11 @@ error_not_enough_space: // func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: CMOV, SSE TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -3055,8 +3037,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero: // Update Literal Length State MOVBQZX DI, R13 - SHRQ $0x10, DI - MOVWQZX DI, DI + SHRL $0x10, DI LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -3075,8 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero: // Update Match Length State MOVBQZX R8, R13 - SHRQ $0x10, R8 - MOVWQZX R8, R8 + SHRL $0x10, R8 LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -3095,8 +3075,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero: // Update Offset State MOVBQZX R9, R13 - SHRQ $0x10, R9 - MOVWQZX R9, R9 + SHRL $0x10, R9 LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -3485,9 +3464,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Update the context MOVQ ctx+16(FP), AX @@ -3552,11 +3531,11 @@ error_not_enough_space: // func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: BMI, BMI2, CMOV, SSE TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -3704,11 +3683,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end: BZHIQ R13, R14, R14 // Update Offset State - BZHIQ R8, R14, CX - SHRXQ R8, R14, R14 - MOVQ $0x00001010, R13 - BEXTRQ R13, R8, R8 - ADDQ CX, R8 + BZHIQ R8, R14, CX + SHRXQ R8, R14, R14 + SHRL $0x10, R8 + ADDQ CX, R8 // Load ctx.ofTable MOVQ ctx+16(FP), CX @@ -3716,11 +3694,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end: MOVQ (CX)(R8*8), R8 // Update Match Length State - BZHIQ DI, R14, CX - SHRXQ DI, R14, R14 - MOVQ $0x00001010, R13 - BEXTRQ R13, DI, DI - ADDQ CX, DI + BZHIQ DI, R14, CX + SHRXQ DI, R14, R14 + SHRL $0x10, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX @@ -3728,10 +3705,9 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end: MOVQ (CX)(DI*8), DI // Update Literal Length State - BZHIQ SI, R14, CX - MOVQ $0x00001010, R13 - BEXTRQ R13, SI, SI - ADDQ CX, SI + BZHIQ SI, R14, CX + SHRL $0x10, SI + ADDQ CX, SI // Load ctx.llTable MOVQ ctx+16(FP), CX @@ -4110,9 +4086,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Update the context MOVQ ctx+16(FP), AX diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go index ac2a80d291..2fb35b788c 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go @@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { } for i := range seqs { var ll, mo, ml int - if br.off > 4+((maxOffsetBits+16+16)>>3) { + if len(br.in) > 4+((maxOffsetBits+16+16)>>3) { // inlined function: // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index 9e1baad73b..ec13594e89 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -95,10 +95,9 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { var written int64 var readHeader bool { - var header []byte - var n int - header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0]) + header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0]) + var n int n, r.err = w.Write(header) if r.err != nil { return written, r.err diff --git a/vendor/github.com/mattn/go-runewidth/.travis.yml b/vendor/github.com/mattn/go-runewidth/.travis.yml deleted file mode 100644 index 6a21813a3e..0000000000 --- a/vendor/github.com/mattn/go-runewidth/.travis.yml +++ /dev/null @@ -1,16 +0,0 @@ -language: go -sudo: false -go: - - 1.13.x - - tip - -before_install: - - go get -t -v ./... - -script: - - go generate - - git diff --cached --exit-code - - ./go.test.sh - -after_success: - - bash <(curl -s https://codecov.io/bash) diff --git a/vendor/github.com/mattn/go-runewidth/README.md b/vendor/github.com/mattn/go-runewidth/README.md index aa56ab96c2..5e2cfd98cb 100644 --- a/vendor/github.com/mattn/go-runewidth/README.md +++ b/vendor/github.com/mattn/go-runewidth/README.md @@ -1,7 +1,7 @@ go-runewidth ============ -[![Build Status](https://travis-ci.org/mattn/go-runewidth.png?branch=master)](https://travis-ci.org/mattn/go-runewidth) +[![Build Status](https://github.com/mattn/go-runewidth/workflows/test/badge.svg?branch=master)](https://github.com/mattn/go-runewidth/actions?query=workflow%3Atest) [![Codecov](https://codecov.io/gh/mattn/go-runewidth/branch/master/graph/badge.svg)](https://codecov.io/gh/mattn/go-runewidth) [![GoDoc](https://godoc.org/github.com/mattn/go-runewidth?status.svg)](http://godoc.org/github.com/mattn/go-runewidth) [![Go Report Card](https://goreportcard.com/badge/github.com/mattn/go-runewidth)](https://goreportcard.com/report/github.com/mattn/go-runewidth) diff --git a/vendor/github.com/mattn/go-runewidth/go.test.sh b/vendor/github.com/mattn/go-runewidth/go.test.sh deleted file mode 100644 index 012162b077..0000000000 --- a/vendor/github.com/mattn/go-runewidth/go.test.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash - -set -e -echo "" > coverage.txt - -for d in $(go list ./... | grep -v vendor); do - go test -race -coverprofile=profile.out -covermode=atomic "$d" - if [ -f profile.out ]; then - cat profile.out >> coverage.txt - rm profile.out - fi -done diff --git a/vendor/github.com/mattn/go-runewidth/runewidth.go b/vendor/github.com/mattn/go-runewidth/runewidth.go index 3d7fa560b8..7dfbb3be91 100644 --- a/vendor/github.com/mattn/go-runewidth/runewidth.go +++ b/vendor/github.com/mattn/go-runewidth/runewidth.go @@ -2,6 +2,7 @@ package runewidth import ( "os" + "strings" "github.com/rivo/uniseg" ) @@ -34,7 +35,13 @@ func handleEnv() { EastAsianWidth = env == "1" } // update DefaultCondition - DefaultCondition.EastAsianWidth = EastAsianWidth + if DefaultCondition.EastAsianWidth != EastAsianWidth { + DefaultCondition.EastAsianWidth = EastAsianWidth + if len(DefaultCondition.combinedLut) > 0 { + DefaultCondition.combinedLut = DefaultCondition.combinedLut[:0] + CreateLUT() + } + } } type interval struct { @@ -89,6 +96,7 @@ var nonprint = table{ // Condition have flag EastAsianWidth whether the current locale is CJK or not. type Condition struct { + combinedLut []byte EastAsianWidth bool StrictEmojiNeutral bool } @@ -104,10 +112,16 @@ func NewCondition() *Condition { // RuneWidth returns the number of cells in r. // See http://www.unicode.org/reports/tr11/ func (c *Condition) RuneWidth(r rune) int { + if r < 0 || r > 0x10FFFF { + return 0 + } + if len(c.combinedLut) > 0 { + return int(c.combinedLut[r>>1]>>(uint(r&1)*4)) & 3 + } // optimized version, verified by TestRuneWidthChecksums() if !c.EastAsianWidth { switch { - case r < 0x20 || r > 0x10FFFF: + case r < 0x20: return 0 case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint return 0 @@ -124,7 +138,7 @@ func (c *Condition) RuneWidth(r rune) int { } } else { switch { - case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining): + case inTables(r, nonprint, combining): return 0 case inTable(r, narrow): return 1 @@ -138,6 +152,27 @@ func (c *Condition) RuneWidth(r rune) int { } } +// CreateLUT will create an in-memory lookup table of 557056 bytes for faster operation. +// This should not be called concurrently with other operations on c. +// If options in c is changed, CreateLUT should be called again. +func (c *Condition) CreateLUT() { + const max = 0x110000 + lut := c.combinedLut + if len(c.combinedLut) != 0 { + // Remove so we don't use it. + c.combinedLut = nil + } else { + lut = make([]byte, max/2) + } + for i := range lut { + i32 := int32(i * 2) + x0 := c.RuneWidth(i32) + x1 := c.RuneWidth(i32 + 1) + lut[i] = uint8(x0) | uint8(x1)<<4 + } + c.combinedLut = lut +} + // StringWidth return width as you can see func (c *Condition) StringWidth(s string) (width int) { g := uniseg.NewGraphemes(s) @@ -180,11 +215,47 @@ func (c *Condition) Truncate(s string, w int, tail string) string { return s[:pos] + tail } +// TruncateLeft cuts w cells from the beginning of the `s`. +func (c *Condition) TruncateLeft(s string, w int, prefix string) string { + if c.StringWidth(s) <= w { + return prefix + } + + var width int + pos := len(s) + + g := uniseg.NewGraphemes(s) + for g.Next() { + var chWidth int + for _, r := range g.Runes() { + chWidth = c.RuneWidth(r) + if chWidth > 0 { + break // See StringWidth() for details. + } + } + + if width+chWidth > w { + if width < w { + _, pos = g.Positions() + prefix += strings.Repeat(" ", width+chWidth-w) + } else { + pos, _ = g.Positions() + } + + break + } + + width += chWidth + } + + return prefix + s[pos:] +} + // Wrap return string wrapped with w cells func (c *Condition) Wrap(s string, w int) string { width := 0 out := "" - for _, r := range []rune(s) { + for _, r := range s { cw := c.RuneWidth(r) if r == '\n' { out += string(r) @@ -257,6 +328,11 @@ func Truncate(s string, w int, tail string) string { return DefaultCondition.Truncate(s, w, tail) } +// TruncateLeft cuts w cells from the beginning of the `s`. +func TruncateLeft(s string, w int, prefix string) string { + return DefaultCondition.TruncateLeft(s, w, prefix) +} + // Wrap return string wrapped with w cells func Wrap(s string, w int) string { return DefaultCondition.Wrap(s, w) @@ -271,3 +347,12 @@ func FillLeft(s string, w int) string { func FillRight(s string, w int) string { return DefaultCondition.FillRight(s, w) } + +// CreateLUT will create an in-memory lookup table of 557055 bytes for faster operation. +// This should not be called concurrently with other operations. +func CreateLUT() { + if len(DefaultCondition.combinedLut) > 0 { + return + } + DefaultCondition.CreateLUT() +} diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_appengine.go b/vendor/github.com/mattn/go-runewidth/runewidth_appengine.go index 7d99f6e521..84b6528dfe 100644 --- a/vendor/github.com/mattn/go-runewidth/runewidth_appengine.go +++ b/vendor/github.com/mattn/go-runewidth/runewidth_appengine.go @@ -1,3 +1,4 @@ +//go:build appengine // +build appengine package runewidth diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_js.go b/vendor/github.com/mattn/go-runewidth/runewidth_js.go index c5fdf40baa..c2abbc2db3 100644 --- a/vendor/github.com/mattn/go-runewidth/runewidth_js.go +++ b/vendor/github.com/mattn/go-runewidth/runewidth_js.go @@ -1,5 +1,5 @@ -// +build js -// +build !appengine +//go:build js && !appengine +// +build js,!appengine package runewidth diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_posix.go b/vendor/github.com/mattn/go-runewidth/runewidth_posix.go index 480ad74853..5a31d738ec 100644 --- a/vendor/github.com/mattn/go-runewidth/runewidth_posix.go +++ b/vendor/github.com/mattn/go-runewidth/runewidth_posix.go @@ -1,6 +1,5 @@ -// +build !windows -// +build !js -// +build !appengine +//go:build !windows && !js && !appengine +// +build !windows,!js,!appengine package runewidth diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_windows.go b/vendor/github.com/mattn/go-runewidth/runewidth_windows.go index d6a61777d7..5f987a310f 100644 --- a/vendor/github.com/mattn/go-runewidth/runewidth_windows.go +++ b/vendor/github.com/mattn/go-runewidth/runewidth_windows.go @@ -1,5 +1,5 @@ -// +build windows -// +build !appengine +//go:build windows && !appengine +// +build windows,!appengine package runewidth diff --git a/vendor/github.com/rivo/uniseg/README.md b/vendor/github.com/rivo/uniseg/README.md index 89fc21a3df..25e9346874 100644 --- a/vendor/github.com/rivo/uniseg/README.md +++ b/vendor/github.com/rivo/uniseg/README.md @@ -3,13 +3,13 @@ [![Go Reference](https://pkg.go.dev/badge/github.com/rivo/uniseg.svg)](https://pkg.go.dev/github.com/rivo/uniseg) [![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg) -This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/) and Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 14.0.0). +This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/), Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 14.0.0), and monospace font string width calculation similar to [wcwidth](https://man7.org/linux/man-pages/man3/wcwidth.3.html). ## Background ### Grapheme Clusters -In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples: +In Go, [strings are read-only slices of bytes](https://go.dev/blog/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples: |String|Bytes (UTF-8)|Code points (runes)|Grapheme clusters| |-|-|-|-| @@ -31,6 +31,10 @@ Sentence boundaries are often used for triple-click or some other method of sele Line breaking, also known as word wrapping, is the process of breaking a section of text into lines such that it will fit in the available width of a page, window or other display area. This package provides tools to determine where a string may or may not be broken and where it must be broken (for example after newline characters). +### Monospace Width + +Most terminals or text displays / text editors using a monospace font (for example source code editors) use a fixed width for each character. Some characters such as emojis or characters found in Asian and other languages may take up more than one character cell. This package provides tools to determine the number of cells a string will take up when displayed in a monospace font. See [here](https://pkg.go.dev/github.com/rivo/uniseg#hdr-Monospace_Width) for more information. + ## Installation ```bash @@ -47,6 +51,14 @@ fmt.Println(n) // 2 ``` +### Calculating the Monospace String Width + +```go +width := uniseg.StringWidth("🇩🇪🏳️‍🌈!") +fmt.Println(width) +// 5 +``` + ### Using the [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) Class This is the most convenient method of iterating over grapheme clusters: @@ -121,6 +133,13 @@ Similarly, use - [`FirstSentence`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentence) or [`FirstSentenceInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentenceInString) for sentence segmentation only, and - [`FirstLineSegment`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegment) or [`FirstLineSegmentInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegmentInString) for line breaking / word wrapping (although using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) is preferred as it will observe grapheme cluster boundaries). +Finally, if you need to reverse a string while preserving grapheme clusters, use [`ReverseString`](https://pkg.go.dev/github.com/rivo/uniseg#ReverseString): + +```go +fmt.Println(uniseg.ReverseString("🇩🇪🏳️‍🌈")) +// 🏳️‍🌈🇩🇪 +``` + ## Documentation Refer to https://pkg.go.dev/github.com/rivo/uniseg for the package's documentation. diff --git a/vendor/github.com/rivo/uniseg/doc.go b/vendor/github.com/rivo/uniseg/doc.go index 6c498ede16..11224ae22d 100644 --- a/vendor/github.com/rivo/uniseg/doc.go +++ b/vendor/github.com/rivo/uniseg/doc.go @@ -1,8 +1,9 @@ /* -Package uniseg implements Unicode Text Segmentation and Unicode Line Breaking. -Unicode Text Segmentation conforms to Unicode Standard Annex #29 -(https://unicode.org/reports/tr29/) and Unicode Line Breaking conforms to -Unicode Standard Annex #14 (https://unicode.org/reports/tr14/). +Package uniseg implements Unicode Text Segmentation, Unicode Line Breaking, and +string width calculation for monospace fonts. Unicode Text Segmentation conforms +to Unicode Standard Annex #29 (https://unicode.org/reports/tr29/) and Unicode +Line Breaking conforms to Unicode Standard Annex #14 +(https://unicode.org/reports/tr14/). In short, using this package, you can split a string into grapheme clusters (what people would usually refer to as a "character"), into words, and into @@ -12,8 +13,23 @@ as emojis, combining characters, or characters from Asian, Arabic, Hebrew, or other languages. Additionally, you can use it to implement line breaking (or "word wrapping"), that is, to determine where text can be broken over to the next line when the width of the line is not big enough to fit the entire text. +Finally, you can use it to calculate the display width of a string for monospace +fonts. -Grapheme Clusters +# Getting Started + +If you just want to count the number of characters in a string, you can use +[GraphemeClusterCount]. If you want to determine the display width of a string, +you can use [StringWidth]. If you want to iterate over a string, you can use +[Step], [StepString], or the [Graphemes] class (more convenient but less +performant). This will provide you with all information: grapheme clusters, +word boundaries, sentence boundaries, line breaks, and monospace character +widths. The specialized functions [FirstGraphemeCluster], +[FirstGraphemeClusterInString], [FirstWord], [FirstWordInString], +[FirstSentence], and [FirstSentenceInString] can be used if only one type of +information is needed. + +# Grapheme Clusters Consider the rainbow flag emoji: 🏳️‍🌈. On most modern systems, it appears as one character. But its string representation actually has 14 bytes, so counting @@ -21,11 +37,11 @@ bytes (or using len("🏳️‍🌈")) will not work as expected. Counting runes either: The flag has 4 Unicode code points, thus 4 runes. The stdlib function utf8.RuneCountInString("🏳️‍🌈") and len([]rune("🏳️‍🌈")) will both return 4. -The uniseg.GraphemeClusterCount(str) function will return 1 for the rainbow flag -emoji. The Graphemes class and a variety of functions in this package will allow -you to split strings into its grapheme clusters. +The [GraphemeClusterCount] function will return 1 for the rainbow flag emoji. +The Graphemes class and a variety of functions in this package will allow you to +split strings into its grapheme clusters. -Word Boundaries +# Word Boundaries Word boundaries are used in a number of different contexts. The most familiar ones are selection (double-click mouse selection), cursor movement ("move to @@ -33,7 +49,7 @@ next word" control-arrow keys), and the dialog option "Whole Word Search" for search and replace. This package provides methods for determining word boundaries. -Sentence Boundaries +# Sentence Boundaries Sentence boundaries are often used for triple-click or some other method of selecting or iterating through blocks of text that are larger than single words. @@ -41,7 +57,7 @@ They are also used to determine whether words occur within the same sentence in database queries. This package provides methods for determining sentence boundaries. -Line Breaking +# Line Breaking Line breaking, also known as word wrapping, is the process of breaking a section of text into lines such that it will fit in the available width of a page, @@ -49,5 +65,44 @@ window or other display area. This package provides methods to determine the positions in a string where a line must be broken, may be broken, or must not be broken. +# Monospace Width + +Monospace width, as referred to in this package, is the width of a string in a +monospace font. This is commonly used in terminal user interfaces or text +displays or editors that don't support proportional fonts. A width of 1 +corresponds to a single character cell. The C function [wcswidth()] and its +implementation in other programming languages is in widespread use for the same +purpose. However, there is no standard for the calculation of such widths, and +this package differs from wcswidth() in a number of ways, presumably to generate +more visually pleasing results. + +To start, we assume that every code point has a width of 1, with the following +exceptions: + + - Code points with grapheme cluster break properties Control, CR, LF, Extend, + and ZWJ have a width of 0. + - U+2E3A, Two-Em Dash, has a width of 3. + - U+2E3B, Three-Em Dash, has a width of 4. + - Characters with the East-Asian Width properties "Fullwidth" (F) and "Wide" + (W) have a width of 2. (Properties "Ambiguous" (A) and "Neutral" (N) both + have a width of 1.) + - Code points with grapheme cluster break property Regional Indicator have a + width of 2. + - Code points with grapheme cluster break property Extended Pictographic have + a width of 2, unless their Emoji Presentation flag is "No", in which case + the width is 1. + +For Hangul grapheme clusters composed of conjoining Jamo and for Regional +Indicators (flags), all code points except the first one have a width of 0. For +grapheme clusters starting with an Extended Pictographic, any additional code +point will force a total width of 2, except if the Variation Selector-15 +(U+FE0E) is included, in which case the total width is always 1. Grapheme +clusters ending with Variation Selector-16 (U+FE0F) have a width of 2. + +Note that whether these widths appear correct depends on your application's +render engine, to which extent it conforms to the Unicode Standard, and its +choice of font. + +[wcswidth()]: https://man7.org/linux/man-pages/man3/wcswidth.3.html */ package uniseg diff --git a/vendor/github.com/rivo/uniseg/eastasianwidth.go b/vendor/github.com/rivo/uniseg/eastasianwidth.go index 456c1cac59..661934ac2d 100644 --- a/vendor/github.com/rivo/uniseg/eastasianwidth.go +++ b/vendor/github.com/rivo/uniseg/eastasianwidth.go @@ -4,7 +4,10 @@ package uniseg // eastAsianWidth are taken from // https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt -// on July 25, 2022. See https://www.unicode.org/license.html for the Unicode +// and +// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt +// ("Extended_Pictographic" only) +// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // license agreement. var eastAsianWidth = [][3]int{ {0x0000, 0x001F, prN}, // Cc [32] .. diff --git a/vendor/github.com/rivo/uniseg/emojipresentation.go b/vendor/github.com/rivo/uniseg/emojipresentation.go new file mode 100644 index 0000000000..fd0f7451af --- /dev/null +++ b/vendor/github.com/rivo/uniseg/emojipresentation.go @@ -0,0 +1,285 @@ +package uniseg + +// Code generated via go generate from gen_properties.go. DO NOT EDIT. + +// emojiPresentation are taken from +// +// and +// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt +// ("Extended_Pictographic" only) +// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode +// license agreement. +var emojiPresentation = [][3]int{ + {0x231A, 0x231B, prEmojiPresentation}, // E0.6 [2] (⌚..⌛) watch..hourglass done + {0x23E9, 0x23EC, prEmojiPresentation}, // E0.6 [4] (⏩..⏬) fast-forward button..fast down button + {0x23F0, 0x23F0, prEmojiPresentation}, // E0.6 [1] (⏰) alarm clock + {0x23F3, 0x23F3, prEmojiPresentation}, // E0.6 [1] (⏳) hourglass not done + {0x25FD, 0x25FE, prEmojiPresentation}, // E0.6 [2] (◽..◾) white medium-small square..black medium-small square + {0x2614, 0x2615, prEmojiPresentation}, // E0.6 [2] (☔..☕) umbrella with rain drops..hot beverage + {0x2648, 0x2653, prEmojiPresentation}, // E0.6 [12] (♈..♓) Aries..Pisces + {0x267F, 0x267F, prEmojiPresentation}, // E0.6 [1] (♿) wheelchair symbol + {0x2693, 0x2693, prEmojiPresentation}, // E0.6 [1] (⚓) anchor + {0x26A1, 0x26A1, prEmojiPresentation}, // E0.6 [1] (⚡) high voltage + {0x26AA, 0x26AB, prEmojiPresentation}, // E0.6 [2] (⚪..⚫) white circle..black circle + {0x26BD, 0x26BE, prEmojiPresentation}, // E0.6 [2] (⚽..⚾) soccer ball..baseball + {0x26C4, 0x26C5, prEmojiPresentation}, // E0.6 [2] (⛄..⛅) snowman without snow..sun behind cloud + {0x26CE, 0x26CE, prEmojiPresentation}, // E0.6 [1] (⛎) Ophiuchus + {0x26D4, 0x26D4, prEmojiPresentation}, // E0.6 [1] (⛔) no entry + {0x26EA, 0x26EA, prEmojiPresentation}, // E0.6 [1] (⛪) church + {0x26F2, 0x26F3, prEmojiPresentation}, // E0.6 [2] (⛲..⛳) fountain..flag in hole + {0x26F5, 0x26F5, prEmojiPresentation}, // E0.6 [1] (⛵) sailboat + {0x26FA, 0x26FA, prEmojiPresentation}, // E0.6 [1] (⛺) tent + {0x26FD, 0x26FD, prEmojiPresentation}, // E0.6 [1] (⛽) fuel pump + {0x2705, 0x2705, prEmojiPresentation}, // E0.6 [1] (✅) check mark button + {0x270A, 0x270B, prEmojiPresentation}, // E0.6 [2] (✊..✋) raised fist..raised hand + {0x2728, 0x2728, prEmojiPresentation}, // E0.6 [1] (✨) sparkles + {0x274C, 0x274C, prEmojiPresentation}, // E0.6 [1] (❌) cross mark + {0x274E, 0x274E, prEmojiPresentation}, // E0.6 [1] (❎) cross mark button + {0x2753, 0x2755, prEmojiPresentation}, // E0.6 [3] (❓..❕) red question mark..white exclamation mark + {0x2757, 0x2757, prEmojiPresentation}, // E0.6 [1] (❗) red exclamation mark + {0x2795, 0x2797, prEmojiPresentation}, // E0.6 [3] (➕..➗) plus..divide + {0x27B0, 0x27B0, prEmojiPresentation}, // E0.6 [1] (➰) curly loop + {0x27BF, 0x27BF, prEmojiPresentation}, // E1.0 [1] (➿) double curly loop + {0x2B1B, 0x2B1C, prEmojiPresentation}, // E0.6 [2] (⬛..⬜) black large square..white large square + {0x2B50, 0x2B50, prEmojiPresentation}, // E0.6 [1] (⭐) star + {0x2B55, 0x2B55, prEmojiPresentation}, // E0.6 [1] (⭕) hollow red circle + {0x1F004, 0x1F004, prEmojiPresentation}, // E0.6 [1] (🀄) mahjong red dragon + {0x1F0CF, 0x1F0CF, prEmojiPresentation}, // E0.6 [1] (🃏) joker + {0x1F18E, 0x1F18E, prEmojiPresentation}, // E0.6 [1] (🆎) AB button (blood type) + {0x1F191, 0x1F19A, prEmojiPresentation}, // E0.6 [10] (🆑..🆚) CL button..VS button + {0x1F1E6, 0x1F1FF, prEmojiPresentation}, // E0.0 [26] (🇦..🇿) regional indicator symbol letter a..regional indicator symbol letter z + {0x1F201, 0x1F201, prEmojiPresentation}, // E0.6 [1] (🈁) Japanese “here” button + {0x1F21A, 0x1F21A, prEmojiPresentation}, // E0.6 [1] (🈚) Japanese “free of charge” button + {0x1F22F, 0x1F22F, prEmojiPresentation}, // E0.6 [1] (🈯) Japanese “reserved” button + {0x1F232, 0x1F236, prEmojiPresentation}, // E0.6 [5] (🈲..🈶) Japanese “prohibited” button..Japanese “not free of charge” button + {0x1F238, 0x1F23A, prEmojiPresentation}, // E0.6 [3] (🈸..🈺) Japanese “application” button..Japanese “open for business” button + {0x1F250, 0x1F251, prEmojiPresentation}, // E0.6 [2] (🉐..🉑) Japanese “bargain” button..Japanese “acceptable” button + {0x1F300, 0x1F30C, prEmojiPresentation}, // E0.6 [13] (🌀..🌌) cyclone..milky way + {0x1F30D, 0x1F30E, prEmojiPresentation}, // E0.7 [2] (🌍..🌎) globe showing Europe-Africa..globe showing Americas + {0x1F30F, 0x1F30F, prEmojiPresentation}, // E0.6 [1] (🌏) globe showing Asia-Australia + {0x1F310, 0x1F310, prEmojiPresentation}, // E1.0 [1] (🌐) globe with meridians + {0x1F311, 0x1F311, prEmojiPresentation}, // E0.6 [1] (🌑) new moon + {0x1F312, 0x1F312, prEmojiPresentation}, // E1.0 [1] (🌒) waxing crescent moon + {0x1F313, 0x1F315, prEmojiPresentation}, // E0.6 [3] (🌓..🌕) first quarter moon..full moon + {0x1F316, 0x1F318, prEmojiPresentation}, // E1.0 [3] (🌖..🌘) waning gibbous moon..waning crescent moon + {0x1F319, 0x1F319, prEmojiPresentation}, // E0.6 [1] (🌙) crescent moon + {0x1F31A, 0x1F31A, prEmojiPresentation}, // E1.0 [1] (🌚) new moon face + {0x1F31B, 0x1F31B, prEmojiPresentation}, // E0.6 [1] (🌛) first quarter moon face + {0x1F31C, 0x1F31C, prEmojiPresentation}, // E0.7 [1] (🌜) last quarter moon face + {0x1F31D, 0x1F31E, prEmojiPresentation}, // E1.0 [2] (🌝..🌞) full moon face..sun with face + {0x1F31F, 0x1F320, prEmojiPresentation}, // E0.6 [2] (🌟..🌠) glowing star..shooting star + {0x1F32D, 0x1F32F, prEmojiPresentation}, // E1.0 [3] (🌭..🌯) hot dog..burrito + {0x1F330, 0x1F331, prEmojiPresentation}, // E0.6 [2] (🌰..🌱) chestnut..seedling + {0x1F332, 0x1F333, prEmojiPresentation}, // E1.0 [2] (🌲..🌳) evergreen tree..deciduous tree + {0x1F334, 0x1F335, prEmojiPresentation}, // E0.6 [2] (🌴..🌵) palm tree..cactus + {0x1F337, 0x1F34A, prEmojiPresentation}, // E0.6 [20] (🌷..🍊) tulip..tangerine + {0x1F34B, 0x1F34B, prEmojiPresentation}, // E1.0 [1] (🍋) lemon + {0x1F34C, 0x1F34F, prEmojiPresentation}, // E0.6 [4] (🍌..🍏) banana..green apple + {0x1F350, 0x1F350, prEmojiPresentation}, // E1.0 [1] (🍐) pear + {0x1F351, 0x1F37B, prEmojiPresentation}, // E0.6 [43] (🍑..🍻) peach..clinking beer mugs + {0x1F37C, 0x1F37C, prEmojiPresentation}, // E1.0 [1] (🍼) baby bottle + {0x1F37E, 0x1F37F, prEmojiPresentation}, // E1.0 [2] (🍾..🍿) bottle with popping cork..popcorn + {0x1F380, 0x1F393, prEmojiPresentation}, // E0.6 [20] (🎀..🎓) ribbon..graduation cap + {0x1F3A0, 0x1F3C4, prEmojiPresentation}, // E0.6 [37] (🎠..🏄) carousel horse..person surfing + {0x1F3C5, 0x1F3C5, prEmojiPresentation}, // E1.0 [1] (🏅) sports medal + {0x1F3C6, 0x1F3C6, prEmojiPresentation}, // E0.6 [1] (🏆) trophy + {0x1F3C7, 0x1F3C7, prEmojiPresentation}, // E1.0 [1] (🏇) horse racing + {0x1F3C8, 0x1F3C8, prEmojiPresentation}, // E0.6 [1] (🏈) american football + {0x1F3C9, 0x1F3C9, prEmojiPresentation}, // E1.0 [1] (🏉) rugby football + {0x1F3CA, 0x1F3CA, prEmojiPresentation}, // E0.6 [1] (🏊) person swimming + {0x1F3CF, 0x1F3D3, prEmojiPresentation}, // E1.0 [5] (🏏..🏓) cricket game..ping pong + {0x1F3E0, 0x1F3E3, prEmojiPresentation}, // E0.6 [4] (🏠..🏣) house..Japanese post office + {0x1F3E4, 0x1F3E4, prEmojiPresentation}, // E1.0 [1] (🏤) post office + {0x1F3E5, 0x1F3F0, prEmojiPresentation}, // E0.6 [12] (🏥..🏰) hospital..castle + {0x1F3F4, 0x1F3F4, prEmojiPresentation}, // E1.0 [1] (🏴) black flag + {0x1F3F8, 0x1F407, prEmojiPresentation}, // E1.0 [16] (🏸..🐇) badminton..rabbit + {0x1F408, 0x1F408, prEmojiPresentation}, // E0.7 [1] (🐈) cat + {0x1F409, 0x1F40B, prEmojiPresentation}, // E1.0 [3] (🐉..🐋) dragon..whale + {0x1F40C, 0x1F40E, prEmojiPresentation}, // E0.6 [3] (🐌..🐎) snail..horse + {0x1F40F, 0x1F410, prEmojiPresentation}, // E1.0 [2] (🐏..🐐) ram..goat + {0x1F411, 0x1F412, prEmojiPresentation}, // E0.6 [2] (🐑..🐒) ewe..monkey + {0x1F413, 0x1F413, prEmojiPresentation}, // E1.0 [1] (🐓) rooster + {0x1F414, 0x1F414, prEmojiPresentation}, // E0.6 [1] (🐔) chicken + {0x1F415, 0x1F415, prEmojiPresentation}, // E0.7 [1] (🐕) dog + {0x1F416, 0x1F416, prEmojiPresentation}, // E1.0 [1] (🐖) pig + {0x1F417, 0x1F429, prEmojiPresentation}, // E0.6 [19] (🐗..🐩) boar..poodle + {0x1F42A, 0x1F42A, prEmojiPresentation}, // E1.0 [1] (🐪) camel + {0x1F42B, 0x1F43E, prEmojiPresentation}, // E0.6 [20] (🐫..🐾) two-hump camel..paw prints + {0x1F440, 0x1F440, prEmojiPresentation}, // E0.6 [1] (👀) eyes + {0x1F442, 0x1F464, prEmojiPresentation}, // E0.6 [35] (👂..👤) ear..bust in silhouette + {0x1F465, 0x1F465, prEmojiPresentation}, // E1.0 [1] (👥) busts in silhouette + {0x1F466, 0x1F46B, prEmojiPresentation}, // E0.6 [6] (👦..👫) boy..woman and man holding hands + {0x1F46C, 0x1F46D, prEmojiPresentation}, // E1.0 [2] (👬..👭) men holding hands..women holding hands + {0x1F46E, 0x1F4AC, prEmojiPresentation}, // E0.6 [63] (👮..💬) police officer..speech balloon + {0x1F4AD, 0x1F4AD, prEmojiPresentation}, // E1.0 [1] (💭) thought balloon + {0x1F4AE, 0x1F4B5, prEmojiPresentation}, // E0.6 [8] (💮..💵) white flower..dollar banknote + {0x1F4B6, 0x1F4B7, prEmojiPresentation}, // E1.0 [2] (💶..💷) euro banknote..pound banknote + {0x1F4B8, 0x1F4EB, prEmojiPresentation}, // E0.6 [52] (💸..📫) money with wings..closed mailbox with raised flag + {0x1F4EC, 0x1F4ED, prEmojiPresentation}, // E0.7 [2] (📬..📭) open mailbox with raised flag..open mailbox with lowered flag + {0x1F4EE, 0x1F4EE, prEmojiPresentation}, // E0.6 [1] (📮) postbox + {0x1F4EF, 0x1F4EF, prEmojiPresentation}, // E1.0 [1] (📯) postal horn + {0x1F4F0, 0x1F4F4, prEmojiPresentation}, // E0.6 [5] (📰..📴) newspaper..mobile phone off + {0x1F4F5, 0x1F4F5, prEmojiPresentation}, // E1.0 [1] (📵) no mobile phones + {0x1F4F6, 0x1F4F7, prEmojiPresentation}, // E0.6 [2] (📶..📷) antenna bars..camera + {0x1F4F8, 0x1F4F8, prEmojiPresentation}, // E1.0 [1] (📸) camera with flash + {0x1F4F9, 0x1F4FC, prEmojiPresentation}, // E0.6 [4] (📹..📼) video camera..videocassette + {0x1F4FF, 0x1F502, prEmojiPresentation}, // E1.0 [4] (📿..🔂) prayer beads..repeat single button + {0x1F503, 0x1F503, prEmojiPresentation}, // E0.6 [1] (🔃) clockwise vertical arrows + {0x1F504, 0x1F507, prEmojiPresentation}, // E1.0 [4] (🔄..🔇) counterclockwise arrows button..muted speaker + {0x1F508, 0x1F508, prEmojiPresentation}, // E0.7 [1] (🔈) speaker low volume + {0x1F509, 0x1F509, prEmojiPresentation}, // E1.0 [1] (🔉) speaker medium volume + {0x1F50A, 0x1F514, prEmojiPresentation}, // E0.6 [11] (🔊..🔔) speaker high volume..bell + {0x1F515, 0x1F515, prEmojiPresentation}, // E1.0 [1] (🔕) bell with slash + {0x1F516, 0x1F52B, prEmojiPresentation}, // E0.6 [22] (🔖..🔫) bookmark..water pistol + {0x1F52C, 0x1F52D, prEmojiPresentation}, // E1.0 [2] (🔬..🔭) microscope..telescope + {0x1F52E, 0x1F53D, prEmojiPresentation}, // E0.6 [16] (🔮..🔽) crystal ball..downwards button + {0x1F54B, 0x1F54E, prEmojiPresentation}, // E1.0 [4] (🕋..🕎) kaaba..menorah + {0x1F550, 0x1F55B, prEmojiPresentation}, // E0.6 [12] (🕐..🕛) one o’clock..twelve o’clock + {0x1F55C, 0x1F567, prEmojiPresentation}, // E0.7 [12] (🕜..🕧) one-thirty..twelve-thirty + {0x1F57A, 0x1F57A, prEmojiPresentation}, // E3.0 [1] (🕺) man dancing + {0x1F595, 0x1F596, prEmojiPresentation}, // E1.0 [2] (🖕..🖖) middle finger..vulcan salute + {0x1F5A4, 0x1F5A4, prEmojiPresentation}, // E3.0 [1] (🖤) black heart + {0x1F5FB, 0x1F5FF, prEmojiPresentation}, // E0.6 [5] (🗻..🗿) mount fuji..moai + {0x1F600, 0x1F600, prEmojiPresentation}, // E1.0 [1] (😀) grinning face + {0x1F601, 0x1F606, prEmojiPresentation}, // E0.6 [6] (😁..😆) beaming face with smiling eyes..grinning squinting face + {0x1F607, 0x1F608, prEmojiPresentation}, // E1.0 [2] (😇..😈) smiling face with halo..smiling face with horns + {0x1F609, 0x1F60D, prEmojiPresentation}, // E0.6 [5] (😉..😍) winking face..smiling face with heart-eyes + {0x1F60E, 0x1F60E, prEmojiPresentation}, // E1.0 [1] (😎) smiling face with sunglasses + {0x1F60F, 0x1F60F, prEmojiPresentation}, // E0.6 [1] (😏) smirking face + {0x1F610, 0x1F610, prEmojiPresentation}, // E0.7 [1] (😐) neutral face + {0x1F611, 0x1F611, prEmojiPresentation}, // E1.0 [1] (😑) expressionless face + {0x1F612, 0x1F614, prEmojiPresentation}, // E0.6 [3] (😒..😔) unamused face..pensive face + {0x1F615, 0x1F615, prEmojiPresentation}, // E1.0 [1] (😕) confused face + {0x1F616, 0x1F616, prEmojiPresentation}, // E0.6 [1] (😖) confounded face + {0x1F617, 0x1F617, prEmojiPresentation}, // E1.0 [1] (😗) kissing face + {0x1F618, 0x1F618, prEmojiPresentation}, // E0.6 [1] (😘) face blowing a kiss + {0x1F619, 0x1F619, prEmojiPresentation}, // E1.0 [1] (😙) kissing face with smiling eyes + {0x1F61A, 0x1F61A, prEmojiPresentation}, // E0.6 [1] (😚) kissing face with closed eyes + {0x1F61B, 0x1F61B, prEmojiPresentation}, // E1.0 [1] (😛) face with tongue + {0x1F61C, 0x1F61E, prEmojiPresentation}, // E0.6 [3] (😜..😞) winking face with tongue..disappointed face + {0x1F61F, 0x1F61F, prEmojiPresentation}, // E1.0 [1] (😟) worried face + {0x1F620, 0x1F625, prEmojiPresentation}, // E0.6 [6] (😠..😥) angry face..sad but relieved face + {0x1F626, 0x1F627, prEmojiPresentation}, // E1.0 [2] (😦..😧) frowning face with open mouth..anguished face + {0x1F628, 0x1F62B, prEmojiPresentation}, // E0.6 [4] (😨..😫) fearful face..tired face + {0x1F62C, 0x1F62C, prEmojiPresentation}, // E1.0 [1] (😬) grimacing face + {0x1F62D, 0x1F62D, prEmojiPresentation}, // E0.6 [1] (😭) loudly crying face + {0x1F62E, 0x1F62F, prEmojiPresentation}, // E1.0 [2] (😮..😯) face with open mouth..hushed face + {0x1F630, 0x1F633, prEmojiPresentation}, // E0.6 [4] (😰..😳) anxious face with sweat..flushed face + {0x1F634, 0x1F634, prEmojiPresentation}, // E1.0 [1] (😴) sleeping face + {0x1F635, 0x1F635, prEmojiPresentation}, // E0.6 [1] (😵) face with crossed-out eyes + {0x1F636, 0x1F636, prEmojiPresentation}, // E1.0 [1] (😶) face without mouth + {0x1F637, 0x1F640, prEmojiPresentation}, // E0.6 [10] (😷..🙀) face with medical mask..weary cat + {0x1F641, 0x1F644, prEmojiPresentation}, // E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes + {0x1F645, 0x1F64F, prEmojiPresentation}, // E0.6 [11] (🙅..🙏) person gesturing NO..folded hands + {0x1F680, 0x1F680, prEmojiPresentation}, // E0.6 [1] (🚀) rocket + {0x1F681, 0x1F682, prEmojiPresentation}, // E1.0 [2] (🚁..🚂) helicopter..locomotive + {0x1F683, 0x1F685, prEmojiPresentation}, // E0.6 [3] (🚃..🚅) railway car..bullet train + {0x1F686, 0x1F686, prEmojiPresentation}, // E1.0 [1] (🚆) train + {0x1F687, 0x1F687, prEmojiPresentation}, // E0.6 [1] (🚇) metro + {0x1F688, 0x1F688, prEmojiPresentation}, // E1.0 [1] (🚈) light rail + {0x1F689, 0x1F689, prEmojiPresentation}, // E0.6 [1] (🚉) station + {0x1F68A, 0x1F68B, prEmojiPresentation}, // E1.0 [2] (🚊..🚋) tram..tram car + {0x1F68C, 0x1F68C, prEmojiPresentation}, // E0.6 [1] (🚌) bus + {0x1F68D, 0x1F68D, prEmojiPresentation}, // E0.7 [1] (🚍) oncoming bus + {0x1F68E, 0x1F68E, prEmojiPresentation}, // E1.0 [1] (🚎) trolleybus + {0x1F68F, 0x1F68F, prEmojiPresentation}, // E0.6 [1] (🚏) bus stop + {0x1F690, 0x1F690, prEmojiPresentation}, // E1.0 [1] (🚐) minibus + {0x1F691, 0x1F693, prEmojiPresentation}, // E0.6 [3] (🚑..🚓) ambulance..police car + {0x1F694, 0x1F694, prEmojiPresentation}, // E0.7 [1] (🚔) oncoming police car + {0x1F695, 0x1F695, prEmojiPresentation}, // E0.6 [1] (🚕) taxi + {0x1F696, 0x1F696, prEmojiPresentation}, // E1.0 [1] (🚖) oncoming taxi + {0x1F697, 0x1F697, prEmojiPresentation}, // E0.6 [1] (🚗) automobile + {0x1F698, 0x1F698, prEmojiPresentation}, // E0.7 [1] (🚘) oncoming automobile + {0x1F699, 0x1F69A, prEmojiPresentation}, // E0.6 [2] (🚙..🚚) sport utility vehicle..delivery truck + {0x1F69B, 0x1F6A1, prEmojiPresentation}, // E1.0 [7] (🚛..🚡) articulated lorry..aerial tramway + {0x1F6A2, 0x1F6A2, prEmojiPresentation}, // E0.6 [1] (🚢) ship + {0x1F6A3, 0x1F6A3, prEmojiPresentation}, // E1.0 [1] (🚣) person rowing boat + {0x1F6A4, 0x1F6A5, prEmojiPresentation}, // E0.6 [2] (🚤..🚥) speedboat..horizontal traffic light + {0x1F6A6, 0x1F6A6, prEmojiPresentation}, // E1.0 [1] (🚦) vertical traffic light + {0x1F6A7, 0x1F6AD, prEmojiPresentation}, // E0.6 [7] (🚧..🚭) construction..no smoking + {0x1F6AE, 0x1F6B1, prEmojiPresentation}, // E1.0 [4] (🚮..🚱) litter in bin sign..non-potable water + {0x1F6B2, 0x1F6B2, prEmojiPresentation}, // E0.6 [1] (🚲) bicycle + {0x1F6B3, 0x1F6B5, prEmojiPresentation}, // E1.0 [3] (🚳..🚵) no bicycles..person mountain biking + {0x1F6B6, 0x1F6B6, prEmojiPresentation}, // E0.6 [1] (🚶) person walking + {0x1F6B7, 0x1F6B8, prEmojiPresentation}, // E1.0 [2] (🚷..🚸) no pedestrians..children crossing + {0x1F6B9, 0x1F6BE, prEmojiPresentation}, // E0.6 [6] (🚹..🚾) men’s room..water closet + {0x1F6BF, 0x1F6BF, prEmojiPresentation}, // E1.0 [1] (🚿) shower + {0x1F6C0, 0x1F6C0, prEmojiPresentation}, // E0.6 [1] (🛀) person taking bath + {0x1F6C1, 0x1F6C5, prEmojiPresentation}, // E1.0 [5] (🛁..🛅) bathtub..left luggage + {0x1F6CC, 0x1F6CC, prEmojiPresentation}, // E1.0 [1] (🛌) person in bed + {0x1F6D0, 0x1F6D0, prEmojiPresentation}, // E1.0 [1] (🛐) place of worship + {0x1F6D1, 0x1F6D2, prEmojiPresentation}, // E3.0 [2] (🛑..🛒) stop sign..shopping cart + {0x1F6D5, 0x1F6D5, prEmojiPresentation}, // E12.0 [1] (🛕) hindu temple + {0x1F6D6, 0x1F6D7, prEmojiPresentation}, // E13.0 [2] (🛖..🛗) hut..elevator + {0x1F6DD, 0x1F6DF, prEmojiPresentation}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy + {0x1F6EB, 0x1F6EC, prEmojiPresentation}, // E1.0 [2] (🛫..🛬) airplane departure..airplane arrival + {0x1F6F4, 0x1F6F6, prEmojiPresentation}, // E3.0 [3] (🛴..🛶) kick scooter..canoe + {0x1F6F7, 0x1F6F8, prEmojiPresentation}, // E5.0 [2] (🛷..🛸) sled..flying saucer + {0x1F6F9, 0x1F6F9, prEmojiPresentation}, // E11.0 [1] (🛹) skateboard + {0x1F6FA, 0x1F6FA, prEmojiPresentation}, // E12.0 [1] (🛺) auto rickshaw + {0x1F6FB, 0x1F6FC, prEmojiPresentation}, // E13.0 [2] (🛻..🛼) pickup truck..roller skate + {0x1F7E0, 0x1F7EB, prEmojiPresentation}, // E12.0 [12] (🟠..🟫) orange circle..brown square + {0x1F7F0, 0x1F7F0, prEmojiPresentation}, // E14.0 [1] (🟰) heavy equals sign + {0x1F90C, 0x1F90C, prEmojiPresentation}, // E13.0 [1] (🤌) pinched fingers + {0x1F90D, 0x1F90F, prEmojiPresentation}, // E12.0 [3] (🤍..🤏) white heart..pinching hand + {0x1F910, 0x1F918, prEmojiPresentation}, // E1.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns + {0x1F919, 0x1F91E, prEmojiPresentation}, // E3.0 [6] (🤙..🤞) call me hand..crossed fingers + {0x1F91F, 0x1F91F, prEmojiPresentation}, // E5.0 [1] (🤟) love-you gesture + {0x1F920, 0x1F927, prEmojiPresentation}, // E3.0 [8] (🤠..🤧) cowboy hat face..sneezing face + {0x1F928, 0x1F92F, prEmojiPresentation}, // E5.0 [8] (🤨..🤯) face with raised eyebrow..exploding head + {0x1F930, 0x1F930, prEmojiPresentation}, // E3.0 [1] (🤰) pregnant woman + {0x1F931, 0x1F932, prEmojiPresentation}, // E5.0 [2] (🤱..🤲) breast-feeding..palms up together + {0x1F933, 0x1F93A, prEmojiPresentation}, // E3.0 [8] (🤳..🤺) selfie..person fencing + {0x1F93C, 0x1F93E, prEmojiPresentation}, // E3.0 [3] (🤼..🤾) people wrestling..person playing handball + {0x1F93F, 0x1F93F, prEmojiPresentation}, // E12.0 [1] (🤿) diving mask + {0x1F940, 0x1F945, prEmojiPresentation}, // E3.0 [6] (🥀..🥅) wilted flower..goal net + {0x1F947, 0x1F94B, prEmojiPresentation}, // E3.0 [5] (🥇..🥋) 1st place medal..martial arts uniform + {0x1F94C, 0x1F94C, prEmojiPresentation}, // E5.0 [1] (🥌) curling stone + {0x1F94D, 0x1F94F, prEmojiPresentation}, // E11.0 [3] (🥍..🥏) lacrosse..flying disc + {0x1F950, 0x1F95E, prEmojiPresentation}, // E3.0 [15] (🥐..🥞) croissant..pancakes + {0x1F95F, 0x1F96B, prEmojiPresentation}, // E5.0 [13] (🥟..🥫) dumpling..canned food + {0x1F96C, 0x1F970, prEmojiPresentation}, // E11.0 [5] (🥬..🥰) leafy green..smiling face with hearts + {0x1F971, 0x1F971, prEmojiPresentation}, // E12.0 [1] (🥱) yawning face + {0x1F972, 0x1F972, prEmojiPresentation}, // E13.0 [1] (🥲) smiling face with tear + {0x1F973, 0x1F976, prEmojiPresentation}, // E11.0 [4] (🥳..🥶) partying face..cold face + {0x1F977, 0x1F978, prEmojiPresentation}, // E13.0 [2] (🥷..🥸) ninja..disguised face + {0x1F979, 0x1F979, prEmojiPresentation}, // E14.0 [1] (🥹) face holding back tears + {0x1F97A, 0x1F97A, prEmojiPresentation}, // E11.0 [1] (🥺) pleading face + {0x1F97B, 0x1F97B, prEmojiPresentation}, // E12.0 [1] (🥻) sari + {0x1F97C, 0x1F97F, prEmojiPresentation}, // E11.0 [4] (🥼..🥿) lab coat..flat shoe + {0x1F980, 0x1F984, prEmojiPresentation}, // E1.0 [5] (🦀..🦄) crab..unicorn + {0x1F985, 0x1F991, prEmojiPresentation}, // E3.0 [13] (🦅..🦑) eagle..squid + {0x1F992, 0x1F997, prEmojiPresentation}, // E5.0 [6] (🦒..🦗) giraffe..cricket + {0x1F998, 0x1F9A2, prEmojiPresentation}, // E11.0 [11] (🦘..🦢) kangaroo..swan + {0x1F9A3, 0x1F9A4, prEmojiPresentation}, // E13.0 [2] (🦣..🦤) mammoth..dodo + {0x1F9A5, 0x1F9AA, prEmojiPresentation}, // E12.0 [6] (🦥..🦪) sloth..oyster + {0x1F9AB, 0x1F9AD, prEmojiPresentation}, // E13.0 [3] (🦫..🦭) beaver..seal + {0x1F9AE, 0x1F9AF, prEmojiPresentation}, // E12.0 [2] (🦮..🦯) guide dog..white cane + {0x1F9B0, 0x1F9B9, prEmojiPresentation}, // E11.0 [10] (🦰..🦹) red hair..supervillain + {0x1F9BA, 0x1F9BF, prEmojiPresentation}, // E12.0 [6] (🦺..🦿) safety vest..mechanical leg + {0x1F9C0, 0x1F9C0, prEmojiPresentation}, // E1.0 [1] (🧀) cheese wedge + {0x1F9C1, 0x1F9C2, prEmojiPresentation}, // E11.0 [2] (🧁..🧂) cupcake..salt + {0x1F9C3, 0x1F9CA, prEmojiPresentation}, // E12.0 [8] (🧃..🧊) beverage box..ice + {0x1F9CB, 0x1F9CB, prEmojiPresentation}, // E13.0 [1] (🧋) bubble tea + {0x1F9CC, 0x1F9CC, prEmojiPresentation}, // E14.0 [1] (🧌) troll + {0x1F9CD, 0x1F9CF, prEmojiPresentation}, // E12.0 [3] (🧍..🧏) person standing..deaf person + {0x1F9D0, 0x1F9E6, prEmojiPresentation}, // E5.0 [23] (🧐..🧦) face with monocle..socks + {0x1F9E7, 0x1F9FF, prEmojiPresentation}, // E11.0 [25] (🧧..🧿) red envelope..nazar amulet + {0x1FA70, 0x1FA73, prEmojiPresentation}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts + {0x1FA74, 0x1FA74, prEmojiPresentation}, // E13.0 [1] (🩴) thong sandal + {0x1FA78, 0x1FA7A, prEmojiPresentation}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope + {0x1FA7B, 0x1FA7C, prEmojiPresentation}, // E14.0 [2] (🩻..🩼) x-ray..crutch + {0x1FA80, 0x1FA82, prEmojiPresentation}, // E12.0 [3] (🪀..🪂) yo-yo..parachute + {0x1FA83, 0x1FA86, prEmojiPresentation}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls + {0x1FA90, 0x1FA95, prEmojiPresentation}, // E12.0 [6] (🪐..🪕) ringed planet..banjo + {0x1FA96, 0x1FAA8, prEmojiPresentation}, // E13.0 [19] (🪖..🪨) military helmet..rock + {0x1FAA9, 0x1FAAC, prEmojiPresentation}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa + {0x1FAB0, 0x1FAB6, prEmojiPresentation}, // E13.0 [7] (🪰..🪶) fly..feather + {0x1FAB7, 0x1FABA, prEmojiPresentation}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs + {0x1FAC0, 0x1FAC2, prEmojiPresentation}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging + {0x1FAC3, 0x1FAC5, prEmojiPresentation}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown + {0x1FAD0, 0x1FAD6, prEmojiPresentation}, // E13.0 [7] (🫐..🫖) blueberries..teapot + {0x1FAD7, 0x1FAD9, prEmojiPresentation}, // E14.0 [3] (🫗..🫙) pouring liquid..jar + {0x1FAE0, 0x1FAE7, prEmojiPresentation}, // E14.0 [8] (🫠..🫧) melting face..bubbles + {0x1FAF0, 0x1FAF6, prEmojiPresentation}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands +} diff --git a/vendor/github.com/rivo/uniseg/gen_properties.go b/vendor/github.com/rivo/uniseg/gen_properties.go index 64512709e1..999d5efddf 100644 --- a/vendor/github.com/rivo/uniseg/gen_properties.go +++ b/vendor/github.com/rivo/uniseg/gen_properties.go @@ -3,19 +3,22 @@ // This program generates a property file in Go file from Unicode Character // Database auxiliary data files. The command line arguments are as follows: // -// 1. The name of the Unicode data file (just the filename, without extension). -// 2. The name of the locally generated Go file. -// 3. The name of the slice mapping code points to properties. -// 4. The name of the generator, for logging purposes. -// 5. (Optional) Flags, comma-separated. The following flags are available: -// - "emojis": include emoji properties (Extended Pictographic only). -// - "gencat": include general category properties. +// 1. The name of the Unicode data file (just the filename, without extension). +// Can be "-" (to skip) if the emoji flag is included. +// 2. The name of the locally generated Go file. +// 3. The name of the slice mapping code points to properties. +// 4. The name of the generator, for logging purposes. +// 5. (Optional) Flags, comma-separated. The following flags are available: +// - "emojis=": include the specified emoji properties (e.g. +// "Extended_Pictographic"). +// - "gencat": include general category properties. // -//go:generate go run gen_properties.go auxiliary/GraphemeBreakProperty graphemeproperties.go graphemeCodePoints graphemes emojis -//go:generate go run gen_properties.go auxiliary/WordBreakProperty wordproperties.go workBreakCodePoints words emojis +//go:generate go run gen_properties.go auxiliary/GraphemeBreakProperty graphemeproperties.go graphemeCodePoints graphemes emojis=Extended_Pictographic +//go:generate go run gen_properties.go auxiliary/WordBreakProperty wordproperties.go workBreakCodePoints words emojis=Extended_Pictographic //go:generate go run gen_properties.go auxiliary/SentenceBreakProperty sentenceproperties.go sentenceBreakCodePoints sentences //go:generate go run gen_properties.go LineBreak lineproperties.go lineBreakCodePoints lines gencat //go:generate go run gen_properties.go EastAsianWidth eastasianwidth.go eastAsianWidth eastasianwidth +//go:generate go run gen_properties.go - emojipresentation.go emojiPresentation emojipresentation emojis=Emoji_Presentation package main import ( @@ -38,8 +41,8 @@ import ( // We want to test against a specific version rather than the latest. When the // package is upgraded to a new version, change these to generate new tests. const ( - gbpURL = `https://www.unicode.org/Public/14.0.0/ucd/%s.txt` - emojiURL = `https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt` + propertyURL = `https://www.unicode.org/Public/14.0.0/ucd/%s.txt` + emojiURL = `https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt` ) // The regular expression for a line containing a code point range property. @@ -55,20 +58,25 @@ func main() { log.SetFlags(0) // Parse flags. - flags := make(map[string]struct{}) + flags := make(map[string]string) if len(os.Args) >= 6 { for _, flag := range strings.Split(os.Args[5], ",") { - flags[flag] = struct{}{} + flagFields := strings.Split(flag, "=") + if len(flagFields) == 1 { + flags[flagFields[0]] = "yes" + } else { + flags[flagFields[0]] = flagFields[1] + } } } // Parse the text file and generate Go source code from it. - var emojis string - if _, ok := flags["emojis"]; ok { - emojis = emojiURL - } _, includeGeneralCategory := flags["gencat"] - src, err := parse(fmt.Sprintf(gbpURL, os.Args[1]), emojis, includeGeneralCategory) + var mainURL string + if os.Args[1] != "-" { + mainURL = fmt.Sprintf(propertyURL, os.Args[1]) + } + src, err := parse(mainURL, flags["emojis"], includeGeneralCategory) if err != nil { log.Fatal(err) } @@ -88,49 +96,57 @@ func main() { // parse parses the Unicode Properties text files located at the given URLs and // returns their equivalent Go source code to be used in the uniseg package. If -// "emojiURL" is an empty string, no emoji code points will be included. If +// "emojiProperty" is not an empty string, emoji code points for that emoji +// property (e.g. "Extended_Pictographic") will be included. In those cases, you +// may pass an empty "propertyURL" to skip parsing the main properties file. If // "includeGeneralCategory" is true, the Unicode General Category property will // be extracted from the comments and included in the output. -func parse(gbpURL, emojiURL string, includeGeneralCategory bool) (string, error) { +func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (string, error) { + if propertyURL == "" && emojiProperty == "" { + return "", errors.New("no properties to parse") + } + // Temporary buffer to hold properties. var properties [][4]string // Open the first URL. - log.Printf("Parsing %s", gbpURL) - res, err := http.Get(gbpURL) - if err != nil { - return "", err - } - in1 := res.Body - defer in1.Close() - - // Parse it. - scanner := bufio.NewScanner(in1) - num := 0 - for scanner.Scan() { - num++ - line := strings.TrimSpace(scanner.Text()) - - // Skip comments and empty lines. - if strings.HasPrefix(line, "#") || line == "" { - continue + if propertyURL != "" { + log.Printf("Parsing %s", propertyURL) + res, err := http.Get(propertyURL) + if err != nil { + return "", err } + in1 := res.Body + defer in1.Close() - // Everything else must be a code point range, a property and a comment. - from, to, property, comment, err := parseProperty(line) - if err != nil { - return "", fmt.Errorf("%s line %d: %v", os.Args[4], num, err) + // Parse it. + scanner := bufio.NewScanner(in1) + num := 0 + for scanner.Scan() { + num++ + line := strings.TrimSpace(scanner.Text()) + + // Skip comments and empty lines. + if strings.HasPrefix(line, "#") || line == "" { + continue + } + + // Everything else must be a code point range, a property and a comment. + from, to, property, comment, err := parseProperty(line) + if err != nil { + return "", fmt.Errorf("%s line %d: %v", os.Args[4], num, err) + } + properties = append(properties, [4]string{from, to, property, comment}) + } + if err := scanner.Err(); err != nil { + return "", err } - properties = append(properties, [4]string{from, to, property, comment}) - } - if err := scanner.Err(); err != nil { - return "", err } // Open the second URL. - if emojiURL != "" { + if emojiProperty != "" { log.Printf("Parsing %s", emojiURL) - res, err = http.Get(emojiURL) + res, err := http.Get(emojiURL) if err != nil { return "", err } @@ -138,15 +154,15 @@ func parse(gbpURL, emojiURL string, includeGeneralCategory bool) (string, error) defer in2.Close() // Parse it. - scanner = bufio.NewScanner(in2) - num = 0 + scanner := bufio.NewScanner(in2) + num := 0 for scanner.Scan() { num++ line := scanner.Text() // Skip comments, empty lines, and everything not containing // "Extended_Pictographic". - if strings.HasPrefix(line, "#") || line == "" || !strings.Contains(line, "Extended_Pictographic") { + if strings.HasPrefix(line, "#") || line == "" || !strings.Contains(line, emojiProperty) { continue } @@ -189,7 +205,7 @@ func parse(gbpURL, emojiURL string, includeGeneralCategory bool) (string, error) // Code generated via go generate from gen_properties.go. DO NOT EDIT. // ` + os.Args[3] + ` are taken from -// ` + gbpURL + emojiComment + ` +// ` + propertyURL + emojiComment + ` // on ` + time.Now().Format("January 2, 2006") + `. See https://www.unicode.org/license.html for the Unicode // license agreement. var ` + os.Args[3] + ` = [][` + strconv.Itoa(columns) + `]int{ diff --git a/vendor/github.com/rivo/uniseg/grapheme.go b/vendor/github.com/rivo/uniseg/grapheme.go index 0c07c3c7a5..d5d4c09e51 100644 --- a/vendor/github.com/rivo/uniseg/grapheme.go +++ b/vendor/github.com/rivo/uniseg/grapheme.go @@ -4,15 +4,17 @@ import "unicode/utf8" // Graphemes implements an iterator over Unicode grapheme clusters, or // user-perceived characters. While iterating, it also provides information -// about word boundaries, sentence boundaries, and line breaks. +// about word boundaries, sentence boundaries, line breaks, and monospace +// character widths. // -// After constructing the class via NewGraphemes(str) for a given string "str", -// Next() is called for every grapheme cluster in a loop until it returns false. -// Inside the loop, information about the grapheme cluster as well as boundary -// information is available via the various methods (see examples below). +// After constructing the class via [NewGraphemes] for a given string "str", +// [Graphemes.Next] is called for every grapheme cluster in a loop until it +// returns false. Inside the loop, information about the grapheme cluster as +// well as boundary information and character width is available via the various +// methods (see examples below). // // Using this class to iterate over a string is convenient but it is much slower -// than using this package's Step() or StepString() functions or any of the +// than using this package's [Step] or [StepString] functions or any of the // other specialized functions starting with "First". type Graphemes struct { // The original string. @@ -28,18 +30,18 @@ type Graphemes struct { // string. offset int - // The current boundary information of the Step() parser. + // The current boundary information of the [Step] parser. boundaries int - // The current state of the Step() parser. + // The current state of the [Step] parser. state int } // NewGraphemes returns a new grapheme cluster iterator. -func NewGraphemes(s string) *Graphemes { +func NewGraphemes(str string) *Graphemes { return &Graphemes{ - original: s, - remaining: s, + original: str, + remaining: str, state: -1, } } @@ -60,8 +62,8 @@ func (g *Graphemes) Next() bool { } // Runes returns a slice of runes (code points) which corresponds to the current -// grapheme cluster. If the iterator is already past the end or Next() has not -// yet been called, nil is returned. +// grapheme cluster. If the iterator is already past the end or [Graphemes.Next] +// has not yet been called, nil is returned. func (g *Graphemes) Runes() []rune { if g.state < 0 { return nil @@ -70,15 +72,15 @@ func (g *Graphemes) Runes() []rune { } // Str returns a substring of the original string which corresponds to the -// current grapheme cluster. If the iterator is already past the end or Next() -// has not yet been called, an empty string is returned. +// current grapheme cluster. If the iterator is already past the end or +// [Graphemes.Next] has not yet been called, an empty string is returned. func (g *Graphemes) Str() string { return g.cluster } // Bytes returns a byte slice which corresponds to the current grapheme cluster. -// If the iterator is already past the end or Next() has not yet been called, -// nil is returned. +// If the iterator is already past the end or [Graphemes.Next] has not yet been +// called, nil is returned. func (g *Graphemes) Bytes() []byte { if g.state < 0 { return nil @@ -90,8 +92,8 @@ func (g *Graphemes) Bytes() []byte { // positions into the original string. The first returned value "from" indexes // the first byte and the second returned value "to" indexes the first byte that // is not included anymore, i.e. str[from:to] is the current grapheme cluster of -// the original string "str". If Next() has not yet been called, both values are -// 0. If the iterator is already past the end, both values are 1. +// the original string "str". If [Graphemes.Next] has not yet been called, both +// values are 0. If the iterator is already past the end, both values are 1. func (g *Graphemes) Positions() (int, int) { if g.state == -1 { return 0, 0 @@ -120,9 +122,9 @@ func (g *Graphemes) IsSentenceBoundary() bool { } // LineBreak returns whether the line can be broken after the current grapheme -// cluster. A value of LineDontBreak means the line may not be broken, a value -// of LineMustBreak means the line must be broken, and a value of LineCanBreak -// means the line may or may not be broken. +// cluster. A value of [LineDontBreak] means the line may not be broken, a value +// of [LineMustBreak] means the line must be broken, and a value of +// [LineCanBreak] means the line may or may not be broken. func (g *Graphemes) LineBreak() int { if g.state == -1 { return LineDontBreak @@ -133,8 +135,16 @@ func (g *Graphemes) LineBreak() int { return g.boundaries & MaskLine } +// Width returns the monospace width of the current grapheme cluster. +func (g *Graphemes) Width() int { + if g.state < 0 { + return 0 + } + return g.boundaries >> ShiftWidth +} + // Reset puts the iterator into its initial state such that the next call to -// Next() sets it to the first grapheme cluster again. +// [Graphemes.Next] sets it to the first grapheme cluster again. func (g *Graphemes) Reset() { g.state = -1 g.offset = 0 @@ -153,6 +163,29 @@ func GraphemeClusterCount(s string) (n int) { return } +// ReverseString reverses the given string while observing grapheme cluster +// boundaries. +func ReverseString(s string) string { + str := []byte(s) + reversed := make([]byte, len(str)) + state := -1 + index := len(str) + for len(str) > 0 { + var cluster []byte + cluster, str, _, state = FirstGraphemeCluster(str, state) + index -= len(cluster) + copy(reversed[index:], cluster) + if index <= len(str)/2 { + break + } + } + return string(reversed) +} + +// The number of bits the grapheme property must be shifted to make place for +// grapheme states. +const shiftGraphemePropState = 4 + // FirstGraphemeCluster returns the first grapheme cluster found in the given // byte slice according to the rules of Unicode Standard Annex #29, Grapheme // Cluster Boundaries. This function can be called continuously to extract all @@ -168,15 +201,15 @@ func GraphemeClusterCount(s string) (n int) { // "cluster" byte slice is the sub-slice of the input slice containing the // identified grapheme cluster. // +// The returned width is the width of the grapheme cluster for most monospace +// fonts where a value of 1 represents one character cell. +// // Given an empty byte slice "b", the function returns nil values. // // While slightly less convenient than using the Graphemes class, this function // has much better performance and makes no allocations. It lends itself well to // large byte slices. -// -// The "reserved" return value is a placeholder for future functionality and may -// be ignored for the time being. -func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, reserved, newState int) { +func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, newState int) { // An empty byte slice returns nothing. if len(b) == 0 { return @@ -185,34 +218,60 @@ func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, reserved, // Extract the first rune. r, length := utf8.DecodeRune(b) if len(b) <= length { // If we're already past the end, there is nothing else to parse. - return b, nil, 0, grAny + var prop int + if state < 0 { + prop = property(graphemeCodePoints, r) + } else { + prop = state >> shiftGraphemePropState + } + return b, nil, runeWidth(r, prop), grAny | (prop << shiftGraphemePropState) } // If we don't know the state, determine it now. + var firstProp int if state < 0 { - state, _ = transitionGraphemeState(state, r) + state, firstProp, _ = transitionGraphemeState(state, r) + } else { + firstProp = state >> shiftGraphemePropState } + width += runeWidth(r, firstProp) // Transition until we find a boundary. - var boundary bool for { + var ( + prop int + boundary bool + ) + r, l := utf8.DecodeRune(b[length:]) - state, boundary = transitionGraphemeState(state, r) + state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r) if boundary { - return b[:length], b[length:], 0, state + return b[:length], b[length:], width, state | (prop << shiftGraphemePropState) + } + + if r == vs16 { + width = 2 + } else if firstProp != prExtendedPictographic && firstProp != prRegionalIndicator && firstProp != prL { + width += runeWidth(r, prop) + } else if firstProp == prExtendedPictographic { + if r == vs15 { + width = 1 + } else { + width = 2 + } } length += l if len(b) <= length { - return b, nil, 0, grAny + return b, nil, width, grAny | (prop << shiftGraphemePropState) } } } -// FirstGraphemeClusterInString is like FirstGraphemeCluster() but its input and +// FirstGraphemeClusterInString is like [FirstGraphemeCluster] but its input and // outputs are strings. -func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, reserved, newState int) { +func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, width, newState int) { // An empty string returns nothing. if len(str) == 0 { return @@ -221,27 +280,53 @@ func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, // Extract the first rune. r, length := utf8.DecodeRuneInString(str) if len(str) <= length { // If we're already past the end, there is nothing else to parse. - return str, "", 0, grAny + var prop int + if state < 0 { + prop = property(graphemeCodePoints, r) + } else { + prop = state >> shiftGraphemePropState + } + return str, "", runeWidth(r, prop), grAny | (prop << shiftGraphemePropState) } // If we don't know the state, determine it now. + var firstProp int if state < 0 { - state, _ = transitionGraphemeState(state, r) + state, firstProp, _ = transitionGraphemeState(state, r) + } else { + firstProp = state >> shiftGraphemePropState } + width += runeWidth(r, firstProp) // Transition until we find a boundary. - var boundary bool for { + var ( + prop int + boundary bool + ) + r, l := utf8.DecodeRuneInString(str[length:]) - state, boundary = transitionGraphemeState(state, r) + state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r) if boundary { - return str[:length], str[length:], 0, state + return str[:length], str[length:], width, state | (prop << shiftGraphemePropState) + } + + if r == vs16 { + width = 2 + } else if firstProp != prExtendedPictographic && firstProp != prRegionalIndicator && firstProp != prL { + width += runeWidth(r, prop) + } else if firstProp == prExtendedPictographic { + if r == vs15 { + width = 1 + } else { + width = 2 + } } length += l if len(str) <= length { - return str, "", 0, grAny + return str, "", width, grAny | (prop << shiftGraphemePropState) } } } diff --git a/vendor/github.com/rivo/uniseg/graphemeproperties.go b/vendor/github.com/rivo/uniseg/graphemeproperties.go index a0c001689c..a87d140bf2 100644 --- a/vendor/github.com/rivo/uniseg/graphemeproperties.go +++ b/vendor/github.com/rivo/uniseg/graphemeproperties.go @@ -7,7 +7,7 @@ package uniseg // and // https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt // ("Extended_Pictographic" only) -// on July 25, 2022. See https://www.unicode.org/license.html for the Unicode +// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // license agreement. var graphemeCodePoints = [][3]int{ {0x0000, 0x0009, prControl}, // Cc [10] .. diff --git a/vendor/github.com/rivo/uniseg/graphemerules.go b/vendor/github.com/rivo/uniseg/graphemerules.go index c8e07111ce..907b30bd0a 100644 --- a/vendor/github.com/rivo/uniseg/graphemerules.go +++ b/vendor/github.com/rivo/uniseg/graphemerules.go @@ -27,14 +27,14 @@ const ( // // This map is queried as follows: // -// 1. Find specific state + specific property. Stop if found. -// 2. Find specific state + any property. -// 3. Find any state + specific property. -// 4. If only (2) or (3) (but not both) was found, stop. -// 5. If both (2) and (3) were found, use state from (3) and breaking instruction -// from the transition with the lower rule number, prefer (3) if rule numbers -// are equal. Stop. -// 6. Assume grAny and grBoundary. +// 1. Find specific state + specific property. Stop if found. +// 2. Find specific state + any property. +// 3. Find any state + specific property. +// 4. If only (2) or (3) (but not both) was found, stop. +// 5. If both (2) and (3) were found, use state from (3) and breaking instruction +// from the transition with the lower rule number, prefer (3) if rule numbers +// are equal. Stop. +// 6. Assume grAny and grBoundary. // // Unicode version 14.0.0. var grTransitions = map[[2]int][3]int{ @@ -92,22 +92,23 @@ var grTransitions = map[[2]int][3]int{ } // transitionGraphemeState determines the new state of the grapheme cluster -// parser given the current state and the next code point. It also returns -// whether a cluster boundary was detected. -func transitionGraphemeState(state int, r rune) (newState int, boundary bool) { +// parser given the current state and the next code point. It also returns the +// code point's grapheme property (the value mapped by the [graphemeCodePoints] +// table) and whether a cluster boundary was detected. +func transitionGraphemeState(state int, r rune) (newState, prop int, boundary bool) { // Determine the property of the next character. - nextProperty := property(graphemeCodePoints, r) + prop = property(graphemeCodePoints, r) // Find the applicable transition. - transition, ok := grTransitions[[2]int{state, nextProperty}] + transition, ok := grTransitions[[2]int{state, prop}] if ok { // We have a specific transition. We'll use it. - return transition[0], transition[1] == grBoundary + return transition[0], prop, transition[1] == grBoundary } // No specific transition found. Try the less specific ones. transAnyProp, okAnyProp := grTransitions[[2]int{state, prAny}] - transAnyState, okAnyState := grTransitions[[2]int{grAny, nextProperty}] + transAnyState, okAnyState := grTransitions[[2]int{grAny, prop}] if okAnyProp && okAnyState { // Both apply. We'll use a mix (see comments for grTransitions). newState = transAnyState[0] @@ -120,7 +121,7 @@ func transitionGraphemeState(state int, r rune) (newState int, boundary bool) { if okAnyProp { // We only have a specific state. - return transAnyProp[0], transAnyProp[1] == grBoundary + return transAnyProp[0], prop, transAnyProp[1] == grBoundary // This branch will probably never be reached because okAnyState will // always be true given the current transition map. But we keep it here // for future modifications to the transition map where this may not be @@ -129,9 +130,9 @@ func transitionGraphemeState(state int, r rune) (newState int, boundary bool) { if okAnyState { // We only have a specific property. - return transAnyState[0], transAnyState[1] == grBoundary + return transAnyState[0], prop, transAnyState[1] == grBoundary } // No known transition. GB999: Any ÷ Any. - return grAny, true + return grAny, prop, true } diff --git a/vendor/github.com/rivo/uniseg/line.go b/vendor/github.com/rivo/uniseg/line.go index 51a9e144fa..c0398cacf8 100644 --- a/vendor/github.com/rivo/uniseg/line.go +++ b/vendor/github.com/rivo/uniseg/line.go @@ -13,7 +13,7 @@ import "unicode/utf8" // // The returned "segment" may not be broken into smaller parts, unless no other // breaking opportunities present themselves, in which case you may break by -// grapheme clusters (using the FirstGraphemeCluster() function to determine the +// grapheme clusters (using the [FirstGraphemeCluster] function to determine the // grapheme clusters). // // The "mustBreak" flag indicates whether you MUST break the line after the @@ -37,11 +37,12 @@ import "unicode/utf8" // // Note that in accordance with UAX #14 LB3, the final segment will end with // "mustBreak" set to true. You can choose to ignore this by checking if the -// length of the "rest" slice is 0. +// length of the "rest" slice is 0 and calling [HasTrailingLineBreak] or +// [HasTrailingLineBreakInString] on the last rune. // // Note also that this algorithm may break within grapheme clusters. This is // addressed in Section 8.2 Example 6 of UAX #14. To avoid this, you can use -// the Step() function instead. +// the [Step] function instead. func FirstLineSegment(b []byte, state int) (segment, rest []byte, mustBreak bool, newState int) { // An empty byte slice returns nothing. if len(b) == 0 { @@ -111,3 +112,20 @@ func FirstLineSegmentInString(str string, state int) (segment, rest string, must } } } + +// HasTrailingLineBreak returns true if the last rune in the given byte slice is +// one of the hard line break code points defined in LB4 and LB5 of [UAX #14]. +// +// [UAX #14]: https://www.unicode.org/reports/tr14/#Algorithm +func HasTrailingLineBreak(b []byte) bool { + r, _ := utf8.DecodeLastRune(b) + property, _ := propertyWithGenCat(lineBreakCodePoints, r) + return property == lbBK || property == lbCR || property == lbLF || property == lbNL +} + +// HasTrailingLineBreakInString is like [HasTrailingLineBreak] but for a string. +func HasTrailingLineBreakInString(str string) bool { + r, _ := utf8.DecodeLastRuneInString(str) + property, _ := propertyWithGenCat(lineBreakCodePoints, r) + return property == lbBK || property == lbCR || property == lbLF || property == lbNL +} diff --git a/vendor/github.com/rivo/uniseg/lineproperties.go b/vendor/github.com/rivo/uniseg/lineproperties.go index 98948adf87..32169306e8 100644 --- a/vendor/github.com/rivo/uniseg/lineproperties.go +++ b/vendor/github.com/rivo/uniseg/lineproperties.go @@ -4,7 +4,10 @@ package uniseg // lineBreakCodePoints are taken from // https://www.unicode.org/Public/14.0.0/ucd/LineBreak.txt -// on July 25, 2022. See https://www.unicode.org/license.html for the Unicode +// and +// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt +// ("Extended_Pictographic" only) +// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // license agreement. var lineBreakCodePoints = [][4]int{ {0x0000, 0x0008, prCM, gcCc}, // [9] .. diff --git a/vendor/github.com/rivo/uniseg/properties.go b/vendor/github.com/rivo/uniseg/properties.go index 0bb3db62e6..bc3c7bcf34 100644 --- a/vendor/github.com/rivo/uniseg/properties.go +++ b/vendor/github.com/rivo/uniseg/properties.go @@ -3,9 +3,9 @@ package uniseg // The Unicode properties as used in the various parsers. Only the ones needed // in the context of this package are included. const ( - prXX = 0 // Same as prAny. - prAny = iota // prAny must be 0. - prPrepend + prXX = 0 // Same as prAny. + prAny = iota // prAny must be 0. + prPrepend // Grapheme properties must come first, to reduce the number of bits stored in the state vector. prCR prLF prControl @@ -86,6 +86,7 @@ const ( prW prH prF + prEmojiPresentation ) // Unicode General Categories. Only the ones needed in the context of this @@ -124,6 +125,12 @@ const ( gcCo ) +// Special code points. +const ( + vs15 = 0xfe0e // Variation Selector-15 (text presentation) + vs16 = 0xfe0f // Variation Selector-16 (emoji presentation) +) + // propertySearch performs a binary search on a property slice and returns the // entry whose range (start = first array element, end = second array element) // includes r, or an array of 0's if no such entry was found. diff --git a/vendor/github.com/rivo/uniseg/sentence.go b/vendor/github.com/rivo/uniseg/sentence.go index 1c1deaff2c..b7fc70996e 100644 --- a/vendor/github.com/rivo/uniseg/sentence.go +++ b/vendor/github.com/rivo/uniseg/sentence.go @@ -51,7 +51,7 @@ func FirstSentence(b []byte, state int) (sentence, rest []byte, newState int) { } } -// FirstSentenceInString is like FirstSentence() but its input and outputs are +// FirstSentenceInString is like [FirstSentence] but its input and outputs are // strings. func FirstSentenceInString(str string, state int) (sentence, rest string, newState int) { // An empty byte slice returns nothing. diff --git a/vendor/github.com/rivo/uniseg/sentenceproperties.go b/vendor/github.com/rivo/uniseg/sentenceproperties.go index e6fe7254c0..ba0cf2de11 100644 --- a/vendor/github.com/rivo/uniseg/sentenceproperties.go +++ b/vendor/github.com/rivo/uniseg/sentenceproperties.go @@ -4,7 +4,10 @@ package uniseg // sentenceBreakCodePoints are taken from // https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt -// on July 25, 2022. See https://www.unicode.org/license.html for the Unicode +// and +// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt +// ("Extended_Pictographic" only) +// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // license agreement. var sentenceBreakCodePoints = [][3]int{ {0x0009, 0x0009, prSp}, // Cc diff --git a/vendor/github.com/rivo/uniseg/step.go b/vendor/github.com/rivo/uniseg/step.go index 8b10489e4a..55e7f1219e 100644 --- a/vendor/github.com/rivo/uniseg/step.go +++ b/vendor/github.com/rivo/uniseg/step.go @@ -2,31 +2,37 @@ package uniseg import "unicode/utf8" -// The bit masks used to extract boundary information returned by the Step() -// function. +// The bit masks used to extract boundary information returned by [Step]. const ( MaskLine = 3 MaskWord = 4 MaskSentence = 8 ) -// The bit positions by which boundary flags are shifted by the Step() function. -// This must correspond to the Mask constants. +// The number of bits to shift the boundary information returned by [Step] to +// obtain the monospace width of the grapheme cluster. +const ShiftWidth = 4 + +// The bit positions by which boundary flags are shifted by the [Step] function. +// These must correspond to the Mask constants. const ( shiftWord = 2 shiftSentence = 3 + // shiftwWidth is ShiftWidth above. No mask as these are always the remaining bits. ) -// The bit positions by which states are shifted by the Step() function. These +// The bit positions by which states are shifted by the [Step] function. These // values must ensure state values defined for each of the boundary algorithms -// don't overlap (and that they all still fit in a single int). +// don't overlap (and that they all still fit in a single int). These must +// correspond to the Mask constants. const ( shiftWordState = 4 shiftSentenceState = 9 shiftLineState = 13 + shiftPropState = 21 // No mask as these are always the remaining bits. ) -// The bit mask used to extract the state returned by the Step() function, after +// The bit mask used to extract the state returned by the [Step] function, after // shifting. These values must correspond to the shift constants. const ( maskGraphemeState = 0xf @@ -37,10 +43,11 @@ const ( // Step returns the first grapheme cluster (user-perceived character) found in // the given byte slice. It also returns information about the boundary between -// that grapheme cluster and the one following it. There are three types of -// boundary information: word boundaries, sentence boundaries, and line breaks. -// This function is therefore a combination of FirstGraphemeCluster(), -// FirstWord(), FirstSentence(), and FirstLineSegment(). +// that grapheme cluster and the one following it as well as the monospace width +// of the grapheme cluster. There are three types of boundary information: word +// boundaries, sentence boundaries, and line breaks. This function is therefore +// a combination of [FirstGraphemeCluster], [FirstWord], [FirstSentence], and +// [FirstLineSegment]. // // The "boundaries" return value can be evaluated as follows: // @@ -54,6 +61,8 @@ const ( // boundary. // - boundaries&MaskLine == LineCanBreak: You may or may not break the line at // the boundary. +// - boundaries >> ShiftWidth: The width of the grapheme cluster for most +// monospace fonts where a value of 1 represents one character cell. // // This function can be called continuously to extract all grapheme clusters // from a byte slice, as illustrated in the examples below. @@ -76,7 +85,8 @@ const ( // // Note that in accordance with UAX #14 LB3, the final segment will end with // a mandatory line break (boundaries&MaskLine == LineMustBreak). You can choose -// to ignore this by checking if the length of the "rest" slice is 0. +// to ignore this by checking if the length of the "rest" slice is 0 and calling +// [HasTrailingLineBreak] or [HasTrailingLineBreakInString] on the last rune. func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState int) { // An empty byte slice returns nothing. if len(b) == 0 { @@ -86,14 +96,20 @@ func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState i // Extract the first rune. r, length := utf8.DecodeRune(b) if len(b) <= length { // If we're already past the end, there is nothing else to parse. - return b, nil, LineMustBreak | (1 << shiftWord) | (1 << shiftSentence), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) + var prop int + if state < 0 { + prop = property(graphemeCodePoints, r) + } else { + prop = state >> shiftPropState + } + return b, nil, LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) | (prop << shiftPropState) } // If we don't know the state, determine it now. - var graphemeState, wordState, sentenceState, lineState int + var graphemeState, wordState, sentenceState, lineState, firstProp int remainder := b[length:] if state < 0 { - graphemeState, _ = transitionGraphemeState(state, r) + graphemeState, firstProp, _ = transitionGraphemeState(state, r) wordState, _ = transitionWordBreakState(state, r, remainder, "") sentenceState, _ = transitionSentenceBreakState(state, r, remainder, "") lineState, _ = transitionLineBreakState(state, r, remainder, "") @@ -102,41 +118,56 @@ func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState i wordState = (state >> shiftWordState) & maskWordState sentenceState = (state >> shiftSentenceState) & maskSentenceState lineState = (state >> shiftLineState) & maskLineState + firstProp = state >> shiftPropState } // Transition until we find a grapheme cluster boundary. - var ( - graphemeBoundary, wordBoundary, sentenceBoundary bool - lineBreak int - ) + width := runeWidth(r, firstProp) for { + var ( + graphemeBoundary, wordBoundary, sentenceBoundary bool + lineBreak, prop int + ) + r, l := utf8.DecodeRune(remainder) remainder = b[length+l:] - graphemeState, graphemeBoundary = transitionGraphemeState(graphemeState, r) + graphemeState, prop, graphemeBoundary = transitionGraphemeState(graphemeState, r) wordState, wordBoundary = transitionWordBreakState(wordState, r, remainder, "") sentenceState, sentenceBoundary = transitionSentenceBreakState(sentenceState, r, remainder, "") lineState, lineBreak = transitionLineBreakState(lineState, r, remainder, "") if graphemeBoundary { - boundary := lineBreak + boundary := lineBreak | (width << ShiftWidth) if wordBoundary { boundary |= 1 << shiftWord } if sentenceBoundary { boundary |= 1 << shiftSentence } - return b[:length], b[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState) + return b[:length], b[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState) | (prop << shiftPropState) + } + + if r == vs16 { + width = 2 + } else if firstProp != prExtendedPictographic && firstProp != prRegionalIndicator && firstProp != prL { + width += runeWidth(r, prop) + } else if firstProp == prExtendedPictographic { + if r == vs15 { + width = 1 + } else { + width = 2 + } } length += l if len(b) <= length { - return b, nil, LineMustBreak | (1 << shiftWord) | (1 << shiftSentence), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) + return b, nil, LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (width << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) | (prop << shiftPropState) } } } -// StepString is like Step() but its input and outputs are strings. +// StepString is like [Step] but its input and outputs are strings. func StepString(str string, state int) (cluster, rest string, boundaries int, newState int) { // An empty byte slice returns nothing. if len(str) == 0 { @@ -146,14 +177,15 @@ func StepString(str string, state int) (cluster, rest string, boundaries int, ne // Extract the first rune. r, length := utf8.DecodeRuneInString(str) if len(str) <= length { // If we're already past the end, there is nothing else to parse. - return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) + prop := property(graphemeCodePoints, r) + return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) } // If we don't know the state, determine it now. - var graphemeState, wordState, sentenceState, lineState int + var graphemeState, wordState, sentenceState, lineState, firstProp int remainder := str[length:] if state < 0 { - graphemeState, _ = transitionGraphemeState(state, r) + graphemeState, firstProp, _ = transitionGraphemeState(state, r) wordState, _ = transitionWordBreakState(state, r, nil, remainder) sentenceState, _ = transitionSentenceBreakState(state, r, nil, remainder) lineState, _ = transitionLineBreakState(state, r, nil, remainder) @@ -162,36 +194,51 @@ func StepString(str string, state int) (cluster, rest string, boundaries int, ne wordState = (state >> shiftWordState) & maskWordState sentenceState = (state >> shiftSentenceState) & maskSentenceState lineState = (state >> shiftLineState) & maskLineState + firstProp = state >> shiftPropState } // Transition until we find a grapheme cluster boundary. - var ( - graphemeBoundary, wordBoundary, sentenceBoundary bool - lineBreak int - ) + width := runeWidth(r, firstProp) for { + var ( + graphemeBoundary, wordBoundary, sentenceBoundary bool + lineBreak, prop int + ) + r, l := utf8.DecodeRuneInString(remainder) remainder = str[length+l:] - graphemeState, graphemeBoundary = transitionGraphemeState(graphemeState, r) + graphemeState, prop, graphemeBoundary = transitionGraphemeState(graphemeState, r) wordState, wordBoundary = transitionWordBreakState(wordState, r, nil, remainder) sentenceState, sentenceBoundary = transitionSentenceBreakState(sentenceState, r, nil, remainder) lineState, lineBreak = transitionLineBreakState(lineState, r, nil, remainder) if graphemeBoundary { - boundary := lineBreak + boundary := lineBreak | (width << ShiftWidth) if wordBoundary { boundary |= 1 << shiftWord } if sentenceBoundary { boundary |= 1 << shiftSentence } - return str[:length], str[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState) + return str[:length], str[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState) | (prop << shiftPropState) + } + + if r == vs16 { + width = 2 + } else if firstProp != prExtendedPictographic && firstProp != prRegionalIndicator && firstProp != prL { + width += runeWidth(r, prop) + } else if firstProp == prExtendedPictographic { + if r == vs15 { + width = 1 + } else { + width = 2 + } } length += l if len(str) <= length { - return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) + return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (width << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) | (prop << shiftPropState) } } } diff --git a/vendor/github.com/rivo/uniseg/width.go b/vendor/github.com/rivo/uniseg/width.go new file mode 100644 index 0000000000..12a57cc2e3 --- /dev/null +++ b/vendor/github.com/rivo/uniseg/width.go @@ -0,0 +1,54 @@ +package uniseg + +// runeWidth returns the monospace width for the given rune. The provided +// grapheme property is a value mapped by the [graphemeCodePoints] table. +// +// Every rune has a width of 1, except for runes with the following properties +// (evaluated in this order): +// +// - Control, CR, LF, Extend, ZWJ: Width of 0 +// - \u2e3a, TWO-EM DASH: Width of 3 +// - \u2e3b, THREE-EM DASH: Width of 4 +// - East-Asian width Fullwidth and Wide: Width of 2 (Ambiguous and Neutral +// have a width of 1) +// - Regional Indicator: Width of 2 +// - Extended Pictographic: Width of 2, unless Emoji Presentation is "No". +func runeWidth(r rune, graphemeProperty int) int { + switch graphemeProperty { + case prControl, prCR, prLF, prExtend, prZWJ: + return 0 + case prRegionalIndicator: + return 2 + case prExtendedPictographic: + if property(emojiPresentation, r) == prEmojiPresentation { + return 2 + } + return 1 + } + + switch r { + case 0x2e3a: + return 3 + case 0x2e3b: + return 4 + } + + switch property(eastAsianWidth, r) { + case prW, prF: + return 2 + } + + return 1 +} + +// StringWidth returns the monospace width for the given string, that is, the +// number of same-size cells to be occupied by the string. +func StringWidth(s string) (width int) { + state := -1 + for len(s) > 0 { + var w int + _, s, w, state = FirstGraphemeClusterInString(s, state) + width += w + } + return +} diff --git a/vendor/github.com/rivo/uniseg/word.go b/vendor/github.com/rivo/uniseg/word.go index 41781eed29..785af1e87b 100644 --- a/vendor/github.com/rivo/uniseg/word.go +++ b/vendor/github.com/rivo/uniseg/word.go @@ -51,7 +51,7 @@ func FirstWord(b []byte, state int) (word, rest []byte, newState int) { } } -// FirstWordInString is like FirstWord() but its input and outputs are strings. +// FirstWordInString is like [FirstWord] but its input and outputs are strings. func FirstWordInString(str string, state int) (word, rest string, newState int) { // An empty byte slice returns nothing. if len(str) == 0 { diff --git a/vendor/github.com/rivo/uniseg/wordproperties.go b/vendor/github.com/rivo/uniseg/wordproperties.go index 48697a4331..805cc536cb 100644 --- a/vendor/github.com/rivo/uniseg/wordproperties.go +++ b/vendor/github.com/rivo/uniseg/wordproperties.go @@ -7,7 +7,7 @@ package uniseg // and // https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt // ("Extended_Pictographic" only) -// on July 25, 2022. See https://www.unicode.org/license.html for the Unicode +// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // license agreement. var workBreakCodePoints = [][3]int{ {0x000A, 0x000A, prLF}, // Cc @@ -624,8 +624,8 @@ var workBreakCodePoints = [][3]int{ {0x212A, 0x212D, prALetter}, // L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C {0x212F, 0x2134, prALetter}, // L& [6] SCRIPT SMALL E..SCRIPT SMALL O {0x2135, 0x2138, prALetter}, // Lo [4] ALEF SYMBOL..DALET SYMBOL - {0x2139, 0x2139, prALetter}, // L& INFORMATION SOURCE {0x2139, 0x2139, prExtendedPictographic}, // E0.6 [1] (ℹ️) information + {0x2139, 0x2139, prALetter}, // L& INFORMATION SOURCE {0x213C, 0x213F, prALetter}, // L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI {0x2145, 0x2149, prALetter}, // L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J {0x214E, 0x214E, prALetter}, // L& TURNED SMALL F diff --git a/vendor/github.com/valyala/fastjson/LICENSE b/vendor/github.com/valyala/fastjson/LICENSE new file mode 100644 index 0000000000..6f665f3e29 --- /dev/null +++ b/vendor/github.com/valyala/fastjson/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2018 Aliaksandr Valialkin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/vendor/github.com/valyala/fastjson/fastfloat/parse.go b/vendor/github.com/valyala/fastjson/fastfloat/parse.go new file mode 100644 index 0000000000..5d4a7c7a73 --- /dev/null +++ b/vendor/github.com/valyala/fastjson/fastfloat/parse.go @@ -0,0 +1,499 @@ +package fastfloat + +import ( + "fmt" + "math" + "strconv" + "strings" +) + +// ParseUint64BestEffort parses uint64 number s. +// +// It is equivalent to strconv.ParseUint(s, 10, 64), but is faster. +// +// 0 is returned if the number cannot be parsed. +// See also ParseUint64, which returns parse error if the number cannot be parsed. +func ParseUint64BestEffort(s string) uint64 { + if len(s) == 0 { + return 0 + } + i := uint(0) + d := uint64(0) + j := i + for i < uint(len(s)) { + if s[i] >= '0' && s[i] <= '9' { + d = d*10 + uint64(s[i]-'0') + i++ + if i > 18 { + // The integer part may be out of range for uint64. + // Fall back to slow parsing. + dd, err := strconv.ParseUint(s, 10, 64) + if err != nil { + return 0 + } + return dd + } + continue + } + break + } + if i <= j { + return 0 + } + if i < uint(len(s)) { + // Unparsed tail left. + return 0 + } + return d +} + +// ParseUint64 parses uint64 from s. +// +// It is equivalent to strconv.ParseUint(s, 10, 64), but is faster. +// +// See also ParseUint64BestEffort. +func ParseUint64(s string) (uint64, error) { + if len(s) == 0 { + return 0, fmt.Errorf("cannot parse uint64 from empty string") + } + i := uint(0) + d := uint64(0) + j := i + for i < uint(len(s)) { + if s[i] >= '0' && s[i] <= '9' { + d = d*10 + uint64(s[i]-'0') + i++ + if i > 18 { + // The integer part may be out of range for uint64. + // Fall back to slow parsing. + dd, err := strconv.ParseUint(s, 10, 64) + if err != nil { + return 0, err + } + return dd, nil + } + continue + } + break + } + if i <= j { + return 0, fmt.Errorf("cannot parse uint64 from %q", s) + } + if i < uint(len(s)) { + // Unparsed tail left. + return 0, fmt.Errorf("unparsed tail left after parsing uint64 from %q: %q", s, s[i:]) + } + return d, nil +} + +// ParseInt64BestEffort parses int64 number s. +// +// It is equivalent to strconv.ParseInt(s, 10, 64), but is faster. +// +// 0 is returned if the number cannot be parsed. +// See also ParseInt64, which returns parse error if the number cannot be parsed. +func ParseInt64BestEffort(s string) int64 { + if len(s) == 0 { + return 0 + } + i := uint(0) + minus := s[0] == '-' + if minus { + i++ + if i >= uint(len(s)) { + return 0 + } + } + + d := int64(0) + j := i + for i < uint(len(s)) { + if s[i] >= '0' && s[i] <= '9' { + d = d*10 + int64(s[i]-'0') + i++ + if i > 18 { + // The integer part may be out of range for int64. + // Fall back to slow parsing. + dd, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return 0 + } + return dd + } + continue + } + break + } + if i <= j { + return 0 + } + if i < uint(len(s)) { + // Unparsed tail left. + return 0 + } + if minus { + d = -d + } + return d +} + +// ParseInt64 parses int64 number s. +// +// It is equivalent to strconv.ParseInt(s, 10, 64), but is faster. +// +// See also ParseInt64BestEffort. +func ParseInt64(s string) (int64, error) { + if len(s) == 0 { + return 0, fmt.Errorf("cannot parse int64 from empty string") + } + i := uint(0) + minus := s[0] == '-' + if minus { + i++ + if i >= uint(len(s)) { + return 0, fmt.Errorf("cannot parse int64 from %q", s) + } + } + + d := int64(0) + j := i + for i < uint(len(s)) { + if s[i] >= '0' && s[i] <= '9' { + d = d*10 + int64(s[i]-'0') + i++ + if i > 18 { + // The integer part may be out of range for int64. + // Fall back to slow parsing. + dd, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return 0, err + } + return dd, nil + } + continue + } + break + } + if i <= j { + return 0, fmt.Errorf("cannot parse int64 from %q", s) + } + if i < uint(len(s)) { + // Unparsed tail left. + return 0, fmt.Errorf("unparsed tail left after parsing int64 form %q: %q", s, s[i:]) + } + if minus { + d = -d + } + return d, nil +} + +// Exact powers of 10. +// +// This works faster than math.Pow10, since it avoids additional multiplication. +var float64pow10 = [...]float64{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, +} + +// ParseBestEffort parses floating-point number s. +// +// It is equivalent to strconv.ParseFloat(s, 64), but is faster. +// +// 0 is returned if the number cannot be parsed. +// See also Parse, which returns parse error if the number cannot be parsed. +func ParseBestEffort(s string) float64 { + if len(s) == 0 { + return 0 + } + i := uint(0) + minus := s[0] == '-' + if minus { + i++ + if i >= uint(len(s)) { + return 0 + } + } + + d := uint64(0) + j := i + for i < uint(len(s)) { + if s[i] >= '0' && s[i] <= '9' { + d = d*10 + uint64(s[i]-'0') + i++ + if i > 18 { + // The integer part may be out of range for uint64. + // Fall back to slow parsing. + f, err := strconv.ParseFloat(s, 64) + if err != nil && !math.IsInf(f, 0) { + return 0 + } + return f + } + continue + } + break + } + if i <= j { + s = s[i:] + if strings.HasPrefix(s, "+") { + s = s[1:] + } + // "infinity" is needed for OpenMetrics support. + // See https://github.com/OpenObservability/OpenMetrics/blob/master/OpenMetrics.md + if strings.EqualFold(s, "inf") || strings.EqualFold(s, "infinity") { + if minus { + return -inf + } + return inf + } + if strings.EqualFold(s, "nan") { + return nan + } + return 0 + } + f := float64(d) + if i >= uint(len(s)) { + // Fast path - just integer. + if minus { + f = -f + } + return f + } + + if s[i] == '.' { + // Parse fractional part. + i++ + if i >= uint(len(s)) { + return 0 + } + k := i + for i < uint(len(s)) { + if s[i] >= '0' && s[i] <= '9' { + d = d*10 + uint64(s[i]-'0') + i++ + if i-j >= uint(len(float64pow10)) { + // The mantissa is out of range. Fall back to standard parsing. + f, err := strconv.ParseFloat(s, 64) + if err != nil && !math.IsInf(f, 0) { + return 0 + } + return f + } + continue + } + break + } + if i < k { + return 0 + } + // Convert the entire mantissa to a float at once to avoid rounding errors. + f = float64(d) / float64pow10[i-k] + if i >= uint(len(s)) { + // Fast path - parsed fractional number. + if minus { + f = -f + } + return f + } + } + if s[i] == 'e' || s[i] == 'E' { + // Parse exponent part. + i++ + if i >= uint(len(s)) { + return 0 + } + expMinus := false + if s[i] == '+' || s[i] == '-' { + expMinus = s[i] == '-' + i++ + if i >= uint(len(s)) { + return 0 + } + } + exp := int16(0) + j := i + for i < uint(len(s)) { + if s[i] >= '0' && s[i] <= '9' { + exp = exp*10 + int16(s[i]-'0') + i++ + if exp > 300 { + // The exponent may be too big for float64. + // Fall back to standard parsing. + f, err := strconv.ParseFloat(s, 64) + if err != nil && !math.IsInf(f, 0) { + return 0 + } + return f + } + continue + } + break + } + if i <= j { + return 0 + } + if expMinus { + exp = -exp + } + f *= math.Pow10(int(exp)) + if i >= uint(len(s)) { + if minus { + f = -f + } + return f + } + } + return 0 +} + +// Parse parses floating-point number s. +// +// It is equivalent to strconv.ParseFloat(s, 64), but is faster. +// +// See also ParseBestEffort. +func Parse(s string) (float64, error) { + if len(s) == 0 { + return 0, fmt.Errorf("cannot parse float64 from empty string") + } + i := uint(0) + minus := s[0] == '-' + if minus { + i++ + if i >= uint(len(s)) { + return 0, fmt.Errorf("cannot parse float64 from %q", s) + } + } + + d := uint64(0) + j := i + for i < uint(len(s)) { + if s[i] >= '0' && s[i] <= '9' { + d = d*10 + uint64(s[i]-'0') + i++ + if i > 18 { + // The integer part may be out of range for uint64. + // Fall back to slow parsing. + f, err := strconv.ParseFloat(s, 64) + if err != nil && !math.IsInf(f, 0) { + return 0, err + } + return f, nil + } + continue + } + break + } + if i <= j { + ss := s[i:] + if strings.HasPrefix(ss, "+") { + ss = ss[1:] + } + // "infinity" is needed for OpenMetrics support. + // See https://github.com/OpenObservability/OpenMetrics/blob/master/OpenMetrics.md + if strings.EqualFold(ss, "inf") || strings.EqualFold(ss, "infinity") { + if minus { + return -inf, nil + } + return inf, nil + } + if strings.EqualFold(ss, "nan") { + return nan, nil + } + return 0, fmt.Errorf("unparsed tail left after parsing float64 from %q: %q", s, ss) + } + f := float64(d) + if i >= uint(len(s)) { + // Fast path - just integer. + if minus { + f = -f + } + return f, nil + } + + if s[i] == '.' { + // Parse fractional part. + i++ + if i >= uint(len(s)) { + return 0, fmt.Errorf("cannot parse fractional part in %q", s) + } + k := i + for i < uint(len(s)) { + if s[i] >= '0' && s[i] <= '9' { + d = d*10 + uint64(s[i]-'0') + i++ + if i-j >= uint(len(float64pow10)) { + // The mantissa is out of range. Fall back to standard parsing. + f, err := strconv.ParseFloat(s, 64) + if err != nil && !math.IsInf(f, 0) { + return 0, fmt.Errorf("cannot parse mantissa in %q: %s", s, err) + } + return f, nil + } + continue + } + break + } + if i < k { + return 0, fmt.Errorf("cannot find mantissa in %q", s) + } + // Convert the entire mantissa to a float at once to avoid rounding errors. + f = float64(d) / float64pow10[i-k] + if i >= uint(len(s)) { + // Fast path - parsed fractional number. + if minus { + f = -f + } + return f, nil + } + } + if s[i] == 'e' || s[i] == 'E' { + // Parse exponent part. + i++ + if i >= uint(len(s)) { + return 0, fmt.Errorf("cannot parse exponent in %q", s) + } + expMinus := false + if s[i] == '+' || s[i] == '-' { + expMinus = s[i] == '-' + i++ + if i >= uint(len(s)) { + return 0, fmt.Errorf("cannot parse exponent in %q", s) + } + } + exp := int16(0) + j := i + for i < uint(len(s)) { + if s[i] >= '0' && s[i] <= '9' { + exp = exp*10 + int16(s[i]-'0') + i++ + if exp > 300 { + // The exponent may be too big for float64. + // Fall back to standard parsing. + f, err := strconv.ParseFloat(s, 64) + if err != nil && !math.IsInf(f, 0) { + return 0, fmt.Errorf("cannot parse exponent in %q: %s", s, err) + } + return f, nil + } + continue + } + break + } + if i <= j { + return 0, fmt.Errorf("cannot parse exponent in %q", s) + } + if expMinus { + exp = -exp + } + f *= math.Pow10(int(exp)) + if i >= uint(len(s)) { + if minus { + f = -f + } + return f, nil + } + } + return 0, fmt.Errorf("cannot parse float64 from %q", s) +} + +var inf = math.Inf(1) +var nan = math.NaN() diff --git a/vendor/modules.txt b/vendor/modules.txt index 3daec7dae7..1b12712fb9 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -37,7 +37,7 @@ github.com/DataDog/sketches-go/ddsketch/mapping github.com/DataDog/sketches-go/ddsketch/pb/sketchpb github.com/DataDog/sketches-go/ddsketch/stat github.com/DataDog/sketches-go/ddsketch/store -# github.com/GuanceCloud/cliutils v1.1.21 +# github.com/GuanceCloud/cliutils v1.1.22-0.20240930074036-255c78c086fd ## explicit; go 1.19 github.com/GuanceCloud/cliutils github.com/GuanceCloud/cliutils/dialtesting @@ -1154,8 +1154,8 @@ github.com/kballard/go-shellquote # github.com/kevinburke/ssh_config v0.0.0-20201106050909-4977a11b4351 ## explicit github.com/kevinburke/ssh_config -# github.com/klauspost/compress v1.16.7 -## explicit; go 1.18 +# github.com/klauspost/compress v1.17.9 +## explicit; go 1.20 github.com/klauspost/compress github.com/klauspost/compress/flate github.com/klauspost/compress/fse @@ -1198,7 +1198,7 @@ github.com/mattn/go-isatty # github.com/mattn/go-oci8 v0.1.1 ## explicit; go 1.13 github.com/mattn/go-oci8 -# github.com/mattn/go-runewidth v0.0.13 +# github.com/mattn/go-runewidth v0.0.14 ## explicit; go 1.9 github.com/mattn/go-runewidth # github.com/mattn/go-tty v0.0.3 @@ -1505,7 +1505,7 @@ github.com/remyoudompheng/bigfft # github.com/rivo/tview v0.0.0-20220129131435-1f7581b67bd1 ## explicit; go 1.12 github.com/rivo/tview -# github.com/rivo/uniseg v0.3.1 +# github.com/rivo/uniseg v0.4.3 ## explicit; go 1.18 github.com/rivo/uniseg # github.com/robfig/cron/v3 v3.0.1 @@ -1643,6 +1643,9 @@ github.com/ugorji/go/codec # github.com/valyala/bytebufferpool v1.0.0 ## explicit github.com/valyala/bytebufferpool +# github.com/valyala/fastjson v1.6.3 +## explicit; go 1.12 +github.com/valyala/fastjson/fastfloat # github.com/vishvananda/netlink v1.2.1-beta.2.0.20230807190133-6afddb37c1f0 ## explicit; go 1.12 github.com/vishvananda/netlink @@ -1945,7 +1948,7 @@ golang.org/x/tools/internal/pkgbits golang.org/x/tools/internal/tokeninternal golang.org/x/tools/internal/typeparams golang.org/x/tools/internal/typesinternal -# golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f +# golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 ## explicit; go 1.17 golang.org/x/xerrors golang.org/x/xerrors/internal