From b03d9f553219009e31fc36319fc93ec77e9eee17 Mon Sep 17 00:00:00 2001 From: codebien <2103732+codebien@users.noreply.github.com> Date: Fri, 24 Jan 2025 18:27:01 +0100 Subject: [PATCH] Drop the import from the external extension --- go.mod | 7 +- vendor/github.com/golang/snappy/.gitignore | 16 + vendor/github.com/golang/snappy/AUTHORS | 18 + vendor/github.com/golang/snappy/CONTRIBUTORS | 41 + vendor/github.com/golang/snappy/LICENSE | 27 + vendor/github.com/golang/snappy/README | 107 +++ vendor/github.com/golang/snappy/decode.go | 264 +++++++ .../github.com/golang/snappy/decode_amd64.s | 490 ++++++++++++ .../github.com/golang/snappy/decode_arm64.s | 494 ++++++++++++ vendor/github.com/golang/snappy/decode_asm.go | 15 + .../github.com/golang/snappy/decode_other.go | 115 +++ vendor/github.com/golang/snappy/encode.go | 289 +++++++ .../github.com/golang/snappy/encode_amd64.s | 730 ++++++++++++++++++ .../github.com/golang/snappy/encode_arm64.s | 722 +++++++++++++++++ vendor/github.com/golang/snappy/encode_asm.go | 30 + .../github.com/golang/snappy/encode_other.go | 238 ++++++ vendor/github.com/golang/snappy/snappy.go | 98 +++ .../pkg/remotewrite/config.go | 451 ----------- .../pkg/remotewrite/prometheus.go | 52 -- .../pkg/remotewrite/remotewrite.go | 406 ---------- .../pkg/remotewrite/trend.go | 250 ------ vendor/modules.txt | 4 +- 22 files changed, 3701 insertions(+), 1163 deletions(-) create mode 100644 vendor/github.com/golang/snappy/.gitignore create mode 100644 vendor/github.com/golang/snappy/AUTHORS create mode 100644 vendor/github.com/golang/snappy/CONTRIBUTORS create mode 100644 vendor/github.com/golang/snappy/LICENSE create mode 100644 vendor/github.com/golang/snappy/README create mode 100644 vendor/github.com/golang/snappy/decode.go create mode 100644 vendor/github.com/golang/snappy/decode_amd64.s create mode 100644 vendor/github.com/golang/snappy/decode_arm64.s create mode 100644 vendor/github.com/golang/snappy/decode_asm.go create mode 100644 vendor/github.com/golang/snappy/decode_other.go create mode 100644 vendor/github.com/golang/snappy/encode.go create mode 100644 vendor/github.com/golang/snappy/encode_amd64.s create mode 100644 vendor/github.com/golang/snappy/encode_arm64.s create mode 100644 vendor/github.com/golang/snappy/encode_asm.go create mode 100644 vendor/github.com/golang/snappy/encode_other.go create mode 100644 vendor/github.com/golang/snappy/snappy.go delete mode 100644 vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/config.go delete mode 100644 vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/prometheus.go delete mode 100644 vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/remotewrite.go delete mode 100644 vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/trend.go diff --git a/go.mod b/go.mod index 50056066644..403064a4ff7 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/fatih/color v1.18.0 github.com/go-sourcemap/sourcemap v2.1.4+incompatible github.com/golang/protobuf v1.5.4 + github.com/golang/snappy v0.0.4 github.com/gorilla/websocket v1.5.3 github.com/grafana/xk6-dashboard v0.7.5 github.com/grafana/xk6-output-opentelemetry v0.3.0 @@ -57,7 +58,7 @@ require ( require ( buf.build/gen/go/gogo/protobuf/protocolbuffers/go v1.31.0-20210810001428-4df00b267f94.1 // indirect - buf.build/gen/go/prometheus/prometheus/protocolbuffers/go v1.31.0-20230627135113-9a12bc2590d2.1 // indirect + buf.build/gen/go/prometheus/prometheus/protocolbuffers/go v1.31.0-20230627135113-9a12bc2590d2.1 github.com/andybalholm/cascadia v1.3.2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bufbuild/protocompile v0.14.1 // indirect @@ -80,8 +81,8 @@ require ( github.com/nxadm/tail v1.4.11 // indirect github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.16.0 // indirect - github.com/prometheus/client_model v0.4.0 // indirect + github.com/prometheus/client_golang v1.16.0 + github.com/prometheus/client_model v0.4.0 github.com/prometheus/common v0.42.0 // indirect github.com/prometheus/procfs v0.10.1 // indirect github.com/r3labs/sse/v2 v2.10.0 // indirect diff --git a/vendor/github.com/golang/snappy/.gitignore b/vendor/github.com/golang/snappy/.gitignore new file mode 100644 index 00000000000..042091d9b3b --- /dev/null +++ b/vendor/github.com/golang/snappy/.gitignore @@ -0,0 +1,16 @@ +cmd/snappytool/snappytool +testdata/bench + +# These explicitly listed benchmark data files are for an obsolete version of +# snappy_test.go. +testdata/alice29.txt +testdata/asyoulik.txt +testdata/fireworks.jpeg +testdata/geo.protodata +testdata/html +testdata/html_x_4 +testdata/kppkn.gtb +testdata/lcet10.txt +testdata/paper-100k.pdf +testdata/plrabn12.txt +testdata/urls.10K diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS new file mode 100644 index 00000000000..52ccb5a934d --- /dev/null +++ b/vendor/github.com/golang/snappy/AUTHORS @@ -0,0 +1,18 @@ +# This is the official list of Snappy-Go authors for copyright purposes. +# This file is distinct from the CONTRIBUTORS files. +# See the latter for an explanation. + +# Names should be added to this file as +# Name or Organization +# The email address is not required for organizations. + +# Please keep the list sorted. + +Amazon.com, Inc +Damian Gryski +Eric Buth +Google Inc. +Jan Mercl <0xjnml@gmail.com> +Klaus Post +Rodolfo Carvalho +Sebastien Binet diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS new file mode 100644 index 00000000000..ea6524ddd02 --- /dev/null +++ b/vendor/github.com/golang/snappy/CONTRIBUTORS @@ -0,0 +1,41 @@ +# This is the official list of people who can contribute +# (and typically have contributed) code to the Snappy-Go repository. +# The AUTHORS file lists the copyright holders; this file +# lists people. For example, Google employees are listed here +# but not in AUTHORS, because Google holds the copyright. +# +# The submission process automatically checks to make sure +# that people submitting code are listed in this file (by email address). +# +# Names should be added to this file only after verifying that +# the individual or the individual's organization has agreed to +# the appropriate Contributor License Agreement, found here: +# +# http://code.google.com/legal/individual-cla-v1.0.html +# http://code.google.com/legal/corporate-cla-v1.0.html +# +# The agreement for individuals can be filled out on the web. +# +# When adding J Random Contributor's name to this file, +# either J's name or J's organization's name should be +# added to the AUTHORS file, depending on whether the +# individual or corporate CLA was used. + +# Names should be added to this file like so: +# Name + +# Please keep the list sorted. + +Alex Legg +Damian Gryski +Eric Buth +Jan Mercl <0xjnml@gmail.com> +Jonathan Swinney +Kai Backman +Klaus Post +Marc-Antoine Ruel +Nigel Tao +Rob Pike +Rodolfo Carvalho +Russ Cox +Sebastien Binet diff --git a/vendor/github.com/golang/snappy/LICENSE b/vendor/github.com/golang/snappy/LICENSE new file mode 100644 index 00000000000..6050c10f4c8 --- /dev/null +++ b/vendor/github.com/golang/snappy/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/golang/snappy/README b/vendor/github.com/golang/snappy/README new file mode 100644 index 00000000000..cea12879a0e --- /dev/null +++ b/vendor/github.com/golang/snappy/README @@ -0,0 +1,107 @@ +The Snappy compression format in the Go programming language. + +To download and install from source: +$ go get github.com/golang/snappy + +Unless otherwise noted, the Snappy-Go source files are distributed +under the BSD-style license found in the LICENSE file. + + + +Benchmarks. + +The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten +or so files, the same set used by the C++ Snappy code (github.com/google/snappy +and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @ +3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29: + +"go test -test.bench=." + +_UFlat0-8 2.19GB/s ± 0% html +_UFlat1-8 1.41GB/s ± 0% urls +_UFlat2-8 23.5GB/s ± 2% jpg +_UFlat3-8 1.91GB/s ± 0% jpg_200 +_UFlat4-8 14.0GB/s ± 1% pdf +_UFlat5-8 1.97GB/s ± 0% html4 +_UFlat6-8 814MB/s ± 0% txt1 +_UFlat7-8 785MB/s ± 0% txt2 +_UFlat8-8 857MB/s ± 0% txt3 +_UFlat9-8 719MB/s ± 1% txt4 +_UFlat10-8 2.84GB/s ± 0% pb +_UFlat11-8 1.05GB/s ± 0% gaviota + +_ZFlat0-8 1.04GB/s ± 0% html +_ZFlat1-8 534MB/s ± 0% urls +_ZFlat2-8 15.7GB/s ± 1% jpg +_ZFlat3-8 740MB/s ± 3% jpg_200 +_ZFlat4-8 9.20GB/s ± 1% pdf +_ZFlat5-8 991MB/s ± 0% html4 +_ZFlat6-8 379MB/s ± 0% txt1 +_ZFlat7-8 352MB/s ± 0% txt2 +_ZFlat8-8 396MB/s ± 1% txt3 +_ZFlat9-8 327MB/s ± 1% txt4 +_ZFlat10-8 1.33GB/s ± 1% pb +_ZFlat11-8 605MB/s ± 1% gaviota + + + +"go test -test.bench=. -tags=noasm" + +_UFlat0-8 621MB/s ± 2% html +_UFlat1-8 494MB/s ± 1% urls +_UFlat2-8 23.2GB/s ± 1% jpg +_UFlat3-8 1.12GB/s ± 1% jpg_200 +_UFlat4-8 4.35GB/s ± 1% pdf +_UFlat5-8 609MB/s ± 0% html4 +_UFlat6-8 296MB/s ± 0% txt1 +_UFlat7-8 288MB/s ± 0% txt2 +_UFlat8-8 309MB/s ± 1% txt3 +_UFlat9-8 280MB/s ± 1% txt4 +_UFlat10-8 753MB/s ± 0% pb +_UFlat11-8 400MB/s ± 0% gaviota + +_ZFlat0-8 409MB/s ± 1% html +_ZFlat1-8 250MB/s ± 1% urls +_ZFlat2-8 12.3GB/s ± 1% jpg +_ZFlat3-8 132MB/s ± 0% jpg_200 +_ZFlat4-8 2.92GB/s ± 0% pdf +_ZFlat5-8 405MB/s ± 1% html4 +_ZFlat6-8 179MB/s ± 1% txt1 +_ZFlat7-8 170MB/s ± 1% txt2 +_ZFlat8-8 189MB/s ± 1% txt3 +_ZFlat9-8 164MB/s ± 1% txt4 +_ZFlat10-8 479MB/s ± 1% pb +_ZFlat11-8 270MB/s ± 1% gaviota + + + +For comparison (Go's encoded output is byte-for-byte identical to C++'s), here +are the numbers from C++ Snappy's + +make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log + +BM_UFlat/0 2.4GB/s html +BM_UFlat/1 1.4GB/s urls +BM_UFlat/2 21.8GB/s jpg +BM_UFlat/3 1.5GB/s jpg_200 +BM_UFlat/4 13.3GB/s pdf +BM_UFlat/5 2.1GB/s html4 +BM_UFlat/6 1.0GB/s txt1 +BM_UFlat/7 959.4MB/s txt2 +BM_UFlat/8 1.0GB/s txt3 +BM_UFlat/9 864.5MB/s txt4 +BM_UFlat/10 2.9GB/s pb +BM_UFlat/11 1.2GB/s gaviota + +BM_ZFlat/0 944.3MB/s html (22.31 %) +BM_ZFlat/1 501.6MB/s urls (47.78 %) +BM_ZFlat/2 14.3GB/s jpg (99.95 %) +BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %) +BM_ZFlat/4 8.3GB/s pdf (83.30 %) +BM_ZFlat/5 903.5MB/s html4 (22.52 %) +BM_ZFlat/6 336.0MB/s txt1 (57.88 %) +BM_ZFlat/7 312.3MB/s txt2 (61.91 %) +BM_ZFlat/8 353.1MB/s txt3 (54.99 %) +BM_ZFlat/9 289.9MB/s txt4 (66.26 %) +BM_ZFlat/10 1.2GB/s pb (19.68 %) +BM_ZFlat/11 527.4MB/s gaviota (37.72 %) diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go new file mode 100644 index 00000000000..23c6e26c6b9 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode.go @@ -0,0 +1,264 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" + "io" +) + +var ( + // ErrCorrupt reports that the input is invalid. + ErrCorrupt = errors.New("snappy: corrupt input") + // ErrTooLarge reports that the uncompressed length is too large. + ErrTooLarge = errors.New("snappy: decoded block is too large") + // ErrUnsupported reports that the input isn't supported. + ErrUnsupported = errors.New("snappy: unsupported input") + + errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") +) + +// DecodedLen returns the length of the decoded block. +func DecodedLen(src []byte) (int, error) { + v, _, err := decodedLen(src) + return v, err +} + +// decodedLen returns the length of the decoded block and the number of bytes +// that the length header occupied. +func decodedLen(src []byte) (blockLen, headerLen int, err error) { + v, n := binary.Uvarint(src) + if n <= 0 || v > 0xffffffff { + return 0, 0, ErrCorrupt + } + + const wordSize = 32 << (^uint(0) >> 32 & 1) + if wordSize == 32 && v > 0x7fffffff { + return 0, 0, ErrTooLarge + } + return int(v), n, nil +} + +const ( + decodeErrCodeCorrupt = 1 + decodeErrCodeUnsupportedLiteralLength = 2 +) + +// Decode returns the decoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire decoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// Decode handles the Snappy block format, not the Snappy stream format. +func Decode(dst, src []byte) ([]byte, error) { + dLen, s, err := decodedLen(src) + if err != nil { + return nil, err + } + if dLen <= len(dst) { + dst = dst[:dLen] + } else { + dst = make([]byte, dLen) + } + switch decode(dst, src[s:]) { + case 0: + return dst, nil + case decodeErrCodeUnsupportedLiteralLength: + return nil, errUnsupportedLiteralLength + } + return nil, ErrCorrupt +} + +// NewReader returns a new Reader that decompresses from r, using the framing +// format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +func NewReader(r io.Reader) *Reader { + return &Reader{ + r: r, + decoded: make([]byte, maxBlockSize), + buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), + } +} + +// Reader is an io.Reader that can read Snappy-compressed bytes. +// +// Reader handles the Snappy stream format, not the Snappy block format. +type Reader struct { + r io.Reader + err error + decoded []byte + buf []byte + // decoded[i:j] contains decoded bytes that have not yet been passed on. + i, j int + readHeader bool +} + +// Reset discards any buffered data, resets all state, and switches the Snappy +// reader to read from r. This permits reusing a Reader rather than allocating +// a new one. +func (r *Reader) Reset(reader io.Reader) { + r.r = reader + r.err = nil + r.i = 0 + r.j = 0 + r.readHeader = false +} + +func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { + if _, r.err = io.ReadFull(r.r, p); r.err != nil { + if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { + r.err = ErrCorrupt + } + return false + } + return true +} + +func (r *Reader) fill() error { + for r.i >= r.j { + if !r.readFull(r.buf[:4], true) { + return r.err + } + chunkType := r.buf[0] + if !r.readHeader { + if chunkType != chunkTypeStreamIdentifier { + r.err = ErrCorrupt + return r.err + } + r.readHeader = true + } + chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 + if chunkLen > len(r.buf) { + r.err = ErrUnsupported + return r.err + } + + // The chunk types are specified at + // https://github.com/google/snappy/blob/master/framing_format.txt + switch chunkType { + case chunkTypeCompressedData: + // Section 4.2. Compressed data (chunk type 0x00). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return r.err + } + buf := r.buf[:chunkLen] + if !r.readFull(buf, false) { + return r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + buf = buf[checksumSize:] + + n, err := DecodedLen(buf) + if err != nil { + r.err = err + return r.err + } + if n > len(r.decoded) { + r.err = ErrCorrupt + return r.err + } + if _, err := Decode(r.decoded, buf); err != nil { + r.err = err + return r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeUncompressedData: + // Section 4.3. Uncompressed data (chunk type 0x01). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return r.err + } + buf := r.buf[:checksumSize] + if !r.readFull(buf, false) { + return r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + // Read directly into r.decoded instead of via r.buf. + n := chunkLen - checksumSize + if n > len(r.decoded) { + r.err = ErrCorrupt + return r.err + } + if !r.readFull(r.decoded[:n], false) { + return r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeStreamIdentifier: + // Section 4.1. Stream identifier (chunk type 0xff). + if chunkLen != len(magicBody) { + r.err = ErrCorrupt + return r.err + } + if !r.readFull(r.buf[:len(magicBody)], false) { + return r.err + } + for i := 0; i < len(magicBody); i++ { + if r.buf[i] != magicBody[i] { + r.err = ErrCorrupt + return r.err + } + } + continue + } + + if chunkType <= 0x7f { + // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). + r.err = ErrUnsupported + return r.err + } + // Section 4.4 Padding (chunk type 0xfe). + // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). + if !r.readFull(r.buf[:chunkLen], false) { + return r.err + } + } + + return nil +} + +// Read satisfies the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + if r.err != nil { + return 0, r.err + } + + if err := r.fill(); err != nil { + return 0, err + } + + n := copy(p, r.decoded[r.i:r.j]) + r.i += n + return n, nil +} + +// ReadByte satisfies the io.ByteReader interface. +func (r *Reader) ReadByte() (byte, error) { + if r.err != nil { + return 0, r.err + } + + if err := r.fill(); err != nil { + return 0, err + } + + c := r.decoded[r.i] + r.i++ + return c, nil +} diff --git a/vendor/github.com/golang/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s new file mode 100644 index 00000000000..e6179f65e35 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_amd64.s @@ -0,0 +1,490 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in decode_other.go, except +// where marked with a "!!!". + +// func decode(dst, src []byte) int +// +// All local variables fit into registers. The non-zero stack size is only to +// spill registers and push args when issuing a CALL. The register allocation: +// - AX scratch +// - BX scratch +// - CX length or x +// - DX offset +// - SI &src[s] +// - DI &dst[d] +// + R8 dst_base +// + R9 dst_len +// + R10 dst_base + dst_len +// + R11 src_base +// + R12 src_len +// + R13 src_base + src_len +// - R14 used by doCopy +// - R15 used by doCopy +// +// The registers R8-R13 (marked with a "+") are set at the start of the +// function, and after a CALL returns, and are not otherwise modified. +// +// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI. +// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. +TEXT ·decode(SB), NOSPLIT, $48-56 + // Initialize SI, DI and R8-R13. + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, DI + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, SI + MOVQ R11, R13 + ADDQ R12, R13 + +loop: + // for s < len(src) + CMPQ SI, R13 + JEQ end + + // CX = uint32(src[s]) + // + // switch src[s] & 0x03 + MOVBLZX (SI), CX + MOVL CX, BX + ANDL $3, BX + CMPL BX, $1 + JAE tagCopy + + // ---------------------------------------- + // The code below handles literal tags. + + // case tagLiteral: + // x := uint32(src[s] >> 2) + // switch + SHRL $2, CX + CMPL CX, $60 + JAE tagLit60Plus + + // case x < 60: + // s++ + INCQ SI + +doLit: + // This is the end of the inner "switch", when we have a literal tag. + // + // We assume that CX == x and x fits in a uint32, where x is the variable + // used in the pure Go decode_other.go code. + + // length = int(x) + 1 + // + // Unlike the pure Go code, we don't need to check if length <= 0 because + // CX can hold 64 bits, so the increment cannot overflow. + INCQ CX + + // Prepare to check if copying length bytes will run past the end of dst or + // src. + // + // AX = len(dst) - d + // BX = len(src) - s + MOVQ R10, AX + SUBQ DI, AX + MOVQ R13, BX + SUBQ SI, BX + + // !!! Try a faster technique for short (16 or fewer bytes) copies. + // + // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { + // goto callMemmove // Fall back on calling runtime·memmove. + // } + // + // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s + // against 21 instead of 16, because it cannot assume that all of its input + // is contiguous in memory and so it needs to leave enough source bytes to + // read the next tag without refilling buffers, but Go's Decode assumes + // contiguousness (the src argument is a []byte). + CMPQ CX, $16 + JGT callMemmove + CMPQ AX, $16 + JLT callMemmove + CMPQ BX, $16 + JLT callMemmove + + // !!! Implement the copy from src to dst as a 16-byte load and store. + // (Decode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only length bytes, but that's + // OK. If the input is a valid Snappy encoding then subsequent iterations + // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a + // non-nil error), so the overrun will be ignored. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(SI), X0 + MOVOU X0, 0(DI) + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +callMemmove: + // if length > len(dst)-d || length > len(src)-s { etc } + CMPQ CX, AX + JGT errCorrupt + CMPQ CX, BX + JGT errCorrupt + + // copy(dst[d:], src[s:s+length]) + // + // This means calling runtime·memmove(&dst[d], &src[s], length), so we push + // DI, SI and CX as arguments. Coincidentally, we also need to spill those + // three registers to the stack, to save local variables across the CALL. + MOVQ DI, 0(SP) + MOVQ SI, 8(SP) + MOVQ CX, 16(SP) + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) + CALL runtime·memmove(SB) + + // Restore local variables: unspill registers from the stack and + // re-calculate R8-R13. + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, R13 + ADDQ R12, R13 + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +tagLit60Plus: + // !!! This fragment does the + // + // s += x - 58; if uint(s) > uint(len(src)) { etc } + // + // checks. In the asm version, we code it once instead of once per switch case. + ADDQ CX, SI + SUBQ $58, SI + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // case x == 60: + CMPL CX, $61 + JEQ tagLit61 + JA tagLit62Plus + + // x = uint32(src[s-1]) + MOVBLZX -1(SI), CX + JMP doLit + +tagLit61: + // case x == 61: + // x = uint32(src[s-2]) | uint32(src[s-1])<<8 + MOVWLZX -2(SI), CX + JMP doLit + +tagLit62Plus: + CMPL CX, $62 + JA tagLit63 + + // case x == 62: + // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + MOVWLZX -3(SI), CX + MOVBLZX -1(SI), BX + SHLL $16, BX + ORL BX, CX + JMP doLit + +tagLit63: + // case x == 63: + // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + MOVL -4(SI), CX + JMP doLit + +// The code above handles literal tags. +// ---------------------------------------- +// The code below handles copy tags. + +tagCopy4: + // case tagCopy4: + // s += 5 + ADDQ $5, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // length = 1 + int(src[s-5])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + MOVLQZX -4(SI), DX + JMP doCopy + +tagCopy2: + // case tagCopy2: + // s += 3 + ADDQ $3, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // length = 1 + int(src[s-3])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + MOVWQZX -2(SI), DX + JMP doCopy + +tagCopy: + // We have a copy tag. We assume that: + // - BX == src[s] & 0x03 + // - CX == src[s] + CMPQ BX, $2 + JEQ tagCopy2 + JA tagCopy4 + + // case tagCopy1: + // s += 2 + ADDQ $2, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + MOVQ CX, DX + ANDQ $0xe0, DX + SHLQ $3, DX + MOVBQZX -1(SI), BX + ORQ BX, DX + + // length = 4 + int(src[s-2])>>2&0x7 + SHRQ $2, CX + ANDQ $7, CX + ADDQ $4, CX + +doCopy: + // This is the end of the outer "switch", when we have a copy tag. + // + // We assume that: + // - CX == length && CX > 0 + // - DX == offset + + // if offset <= 0 { etc } + CMPQ DX, $0 + JLE errCorrupt + + // if d < offset { etc } + MOVQ DI, BX + SUBQ R8, BX + CMPQ BX, DX + JLT errCorrupt + + // if length > len(dst)-d { etc } + MOVQ R10, BX + SUBQ DI, BX + CMPQ CX, BX + JGT errCorrupt + + // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length + // + // Set: + // - R14 = len(dst)-d + // - R15 = &dst[d-offset] + MOVQ R10, R14 + SUBQ DI, R14 + MOVQ DI, R15 + SUBQ DX, R15 + + // !!! Try a faster technique for short (16 or fewer bytes) forward copies. + // + // First, try using two 8-byte load/stores, similar to the doLit technique + // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is + // still OK if offset >= 8. Note that this has to be two 8-byte load/stores + // and not one 16-byte load/store, and the first store has to be before the + // second load, due to the overlap if offset is in the range [8, 16). + // + // if length > 16 || offset < 8 || len(dst)-d < 16 { + // goto slowForwardCopy + // } + // copy 16 bytes + // d += length + CMPQ CX, $16 + JGT slowForwardCopy + CMPQ DX, $8 + JLT slowForwardCopy + CMPQ R14, $16 + JLT slowForwardCopy + MOVQ 0(R15), AX + MOVQ AX, 0(DI) + MOVQ 8(R15), BX + MOVQ BX, 8(DI) + ADDQ CX, DI + JMP loop + +slowForwardCopy: + // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we + // can still try 8-byte load stores, provided we can overrun up to 10 extra + // bytes. As above, the overrun will be fixed up by subsequent iterations + // of the outermost loop. + // + // The C++ snappy code calls this technique IncrementalCopyFastPath. Its + // commentary says: + // + // ---- + // + // The main part of this loop is a simple copy of eight bytes at a time + // until we've copied (at least) the requested amount of bytes. However, + // if d and d-offset are less than eight bytes apart (indicating a + // repeating pattern of length < 8), we first need to expand the pattern in + // order to get the correct results. For instance, if the buffer looks like + // this, with the eight-byte and patterns marked as + // intervals: + // + // abxxxxxxxxxxxx + // [------] d-offset + // [------] d + // + // a single eight-byte copy from to will repeat the pattern + // once, after which we can move two bytes without moving : + // + // ababxxxxxxxxxx + // [------] d-offset + // [------] d + // + // and repeat the exercise until the two no longer overlap. + // + // This allows us to do very well in the special case of one single byte + // repeated many times, without taking a big hit for more general cases. + // + // The worst case of extra writing past the end of the match occurs when + // offset == 1 and length == 1; the last copy will read from byte positions + // [0..7] and write to [4..11], whereas it was only supposed to write to + // position 1. Thus, ten excess bytes. + // + // ---- + // + // That "10 byte overrun" worst case is confirmed by Go's + // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy + // and finishSlowForwardCopy algorithm. + // + // if length > len(dst)-d-10 { + // goto verySlowForwardCopy + // } + SUBQ $10, R14 + CMPQ CX, R14 + JGT verySlowForwardCopy + +makeOffsetAtLeast8: + // !!! As above, expand the pattern so that offset >= 8 and we can use + // 8-byte load/stores. + // + // for offset < 8 { + // copy 8 bytes from dst[d-offset:] to dst[d:] + // length -= offset + // d += offset + // offset += offset + // // The two previous lines together means that d-offset, and therefore + // // R15, is unchanged. + // } + CMPQ DX, $8 + JGE fixUpSlowForwardCopy + MOVQ (R15), BX + MOVQ BX, (DI) + SUBQ DX, CX + ADDQ DX, DI + ADDQ DX, DX + JMP makeOffsetAtLeast8 + +fixUpSlowForwardCopy: + // !!! Add length (which might be negative now) to d (implied by DI being + // &dst[d]) so that d ends up at the right place when we jump back to the + // top of the loop. Before we do that, though, we save DI to AX so that, if + // length is positive, copying the remaining length bytes will write to the + // right place. + MOVQ DI, AX + ADDQ CX, DI + +finishSlowForwardCopy: + // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative + // length means that we overrun, but as above, that will be fixed up by + // subsequent iterations of the outermost loop. + CMPQ CX, $0 + JLE loop + MOVQ (R15), BX + MOVQ BX, (AX) + ADDQ $8, R15 + ADDQ $8, AX + SUBQ $8, CX + JMP finishSlowForwardCopy + +verySlowForwardCopy: + // verySlowForwardCopy is a simple implementation of forward copy. In C + // parlance, this is a do/while loop instead of a while loop, since we know + // that length > 0. In Go syntax: + // + // for { + // dst[d] = dst[d - offset] + // d++ + // length-- + // if length == 0 { + // break + // } + // } + MOVB (R15), BX + MOVB BX, (DI) + INCQ R15 + INCQ DI + DECQ CX + JNZ verySlowForwardCopy + JMP loop + +// The code above handles copy tags. +// ---------------------------------------- + +end: + // This is the end of the "for s < len(src)". + // + // if d != len(dst) { etc } + CMPQ DI, R10 + JNE errCorrupt + + // return 0 + MOVQ $0, ret+48(FP) + RET + +errCorrupt: + // return decodeErrCodeCorrupt + MOVQ $1, ret+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/decode_arm64.s b/vendor/github.com/golang/snappy/decode_arm64.s new file mode 100644 index 00000000000..7a3ead17eac --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_arm64.s @@ -0,0 +1,494 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in decode_other.go, except +// where marked with a "!!!". + +// func decode(dst, src []byte) int +// +// All local variables fit into registers. The non-zero stack size is only to +// spill registers and push args when issuing a CALL. The register allocation: +// - R2 scratch +// - R3 scratch +// - R4 length or x +// - R5 offset +// - R6 &src[s] +// - R7 &dst[d] +// + R8 dst_base +// + R9 dst_len +// + R10 dst_base + dst_len +// + R11 src_base +// + R12 src_len +// + R13 src_base + src_len +// - R14 used by doCopy +// - R15 used by doCopy +// +// The registers R8-R13 (marked with a "+") are set at the start of the +// function, and after a CALL returns, and are not otherwise modified. +// +// The d variable is implicitly R7 - R8, and len(dst)-d is R10 - R7. +// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6. +TEXT ·decode(SB), NOSPLIT, $56-56 + // Initialize R6, R7 and R8-R13. + MOVD dst_base+0(FP), R8 + MOVD dst_len+8(FP), R9 + MOVD R8, R7 + MOVD R8, R10 + ADD R9, R10, R10 + MOVD src_base+24(FP), R11 + MOVD src_len+32(FP), R12 + MOVD R11, R6 + MOVD R11, R13 + ADD R12, R13, R13 + +loop: + // for s < len(src) + CMP R13, R6 + BEQ end + + // R4 = uint32(src[s]) + // + // switch src[s] & 0x03 + MOVBU (R6), R4 + MOVW R4, R3 + ANDW $3, R3 + MOVW $1, R1 + CMPW R1, R3 + BGE tagCopy + + // ---------------------------------------- + // The code below handles literal tags. + + // case tagLiteral: + // x := uint32(src[s] >> 2) + // switch + MOVW $60, R1 + LSRW $2, R4, R4 + CMPW R4, R1 + BLS tagLit60Plus + + // case x < 60: + // s++ + ADD $1, R6, R6 + +doLit: + // This is the end of the inner "switch", when we have a literal tag. + // + // We assume that R4 == x and x fits in a uint32, where x is the variable + // used in the pure Go decode_other.go code. + + // length = int(x) + 1 + // + // Unlike the pure Go code, we don't need to check if length <= 0 because + // R4 can hold 64 bits, so the increment cannot overflow. + ADD $1, R4, R4 + + // Prepare to check if copying length bytes will run past the end of dst or + // src. + // + // R2 = len(dst) - d + // R3 = len(src) - s + MOVD R10, R2 + SUB R7, R2, R2 + MOVD R13, R3 + SUB R6, R3, R3 + + // !!! Try a faster technique for short (16 or fewer bytes) copies. + // + // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { + // goto callMemmove // Fall back on calling runtime·memmove. + // } + // + // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s + // against 21 instead of 16, because it cannot assume that all of its input + // is contiguous in memory and so it needs to leave enough source bytes to + // read the next tag without refilling buffers, but Go's Decode assumes + // contiguousness (the src argument is a []byte). + CMP $16, R4 + BGT callMemmove + CMP $16, R2 + BLT callMemmove + CMP $16, R3 + BLT callMemmove + + // !!! Implement the copy from src to dst as a 16-byte load and store. + // (Decode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only length bytes, but that's + // OK. If the input is a valid Snappy encoding then subsequent iterations + // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a + // non-nil error), so the overrun will be ignored. + // + // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + LDP 0(R6), (R14, R15) + STP (R14, R15), 0(R7) + + // d += length + // s += length + ADD R4, R7, R7 + ADD R4, R6, R6 + B loop + +callMemmove: + // if length > len(dst)-d || length > len(src)-s { etc } + CMP R2, R4 + BGT errCorrupt + CMP R3, R4 + BGT errCorrupt + + // copy(dst[d:], src[s:s+length]) + // + // This means calling runtime·memmove(&dst[d], &src[s], length), so we push + // R7, R6 and R4 as arguments. Coincidentally, we also need to spill those + // three registers to the stack, to save local variables across the CALL. + MOVD R7, 8(RSP) + MOVD R6, 16(RSP) + MOVD R4, 24(RSP) + MOVD R7, 32(RSP) + MOVD R6, 40(RSP) + MOVD R4, 48(RSP) + CALL runtime·memmove(SB) + + // Restore local variables: unspill registers from the stack and + // re-calculate R8-R13. + MOVD 32(RSP), R7 + MOVD 40(RSP), R6 + MOVD 48(RSP), R4 + MOVD dst_base+0(FP), R8 + MOVD dst_len+8(FP), R9 + MOVD R8, R10 + ADD R9, R10, R10 + MOVD src_base+24(FP), R11 + MOVD src_len+32(FP), R12 + MOVD R11, R13 + ADD R12, R13, R13 + + // d += length + // s += length + ADD R4, R7, R7 + ADD R4, R6, R6 + B loop + +tagLit60Plus: + // !!! This fragment does the + // + // s += x - 58; if uint(s) > uint(len(src)) { etc } + // + // checks. In the asm version, we code it once instead of once per switch case. + ADD R4, R6, R6 + SUB $58, R6, R6 + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // case x == 60: + MOVW $61, R1 + CMPW R1, R4 + BEQ tagLit61 + BGT tagLit62Plus + + // x = uint32(src[s-1]) + MOVBU -1(R6), R4 + B doLit + +tagLit61: + // case x == 61: + // x = uint32(src[s-2]) | uint32(src[s-1])<<8 + MOVHU -2(R6), R4 + B doLit + +tagLit62Plus: + CMPW $62, R4 + BHI tagLit63 + + // case x == 62: + // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + MOVHU -3(R6), R4 + MOVBU -1(R6), R3 + ORR R3<<16, R4 + B doLit + +tagLit63: + // case x == 63: + // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + MOVWU -4(R6), R4 + B doLit + + // The code above handles literal tags. + // ---------------------------------------- + // The code below handles copy tags. + +tagCopy4: + // case tagCopy4: + // s += 5 + ADD $5, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // length = 1 + int(src[s-5])>>2 + MOVD $1, R1 + ADD R4>>2, R1, R4 + + // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + MOVWU -4(R6), R5 + B doCopy + +tagCopy2: + // case tagCopy2: + // s += 3 + ADD $3, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // length = 1 + int(src[s-3])>>2 + MOVD $1, R1 + ADD R4>>2, R1, R4 + + // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + MOVHU -2(R6), R5 + B doCopy + +tagCopy: + // We have a copy tag. We assume that: + // - R3 == src[s] & 0x03 + // - R4 == src[s] + CMP $2, R3 + BEQ tagCopy2 + BGT tagCopy4 + + // case tagCopy1: + // s += 2 + ADD $2, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + MOVD R4, R5 + AND $0xe0, R5 + MOVBU -1(R6), R3 + ORR R5<<3, R3, R5 + + // length = 4 + int(src[s-2])>>2&0x7 + MOVD $7, R1 + AND R4>>2, R1, R4 + ADD $4, R4, R4 + +doCopy: + // This is the end of the outer "switch", when we have a copy tag. + // + // We assume that: + // - R4 == length && R4 > 0 + // - R5 == offset + + // if offset <= 0 { etc } + MOVD $0, R1 + CMP R1, R5 + BLE errCorrupt + + // if d < offset { etc } + MOVD R7, R3 + SUB R8, R3, R3 + CMP R5, R3 + BLT errCorrupt + + // if length > len(dst)-d { etc } + MOVD R10, R3 + SUB R7, R3, R3 + CMP R3, R4 + BGT errCorrupt + + // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length + // + // Set: + // - R14 = len(dst)-d + // - R15 = &dst[d-offset] + MOVD R10, R14 + SUB R7, R14, R14 + MOVD R7, R15 + SUB R5, R15, R15 + + // !!! Try a faster technique for short (16 or fewer bytes) forward copies. + // + // First, try using two 8-byte load/stores, similar to the doLit technique + // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is + // still OK if offset >= 8. Note that this has to be two 8-byte load/stores + // and not one 16-byte load/store, and the first store has to be before the + // second load, due to the overlap if offset is in the range [8, 16). + // + // if length > 16 || offset < 8 || len(dst)-d < 16 { + // goto slowForwardCopy + // } + // copy 16 bytes + // d += length + CMP $16, R4 + BGT slowForwardCopy + CMP $8, R5 + BLT slowForwardCopy + CMP $16, R14 + BLT slowForwardCopy + MOVD 0(R15), R2 + MOVD R2, 0(R7) + MOVD 8(R15), R3 + MOVD R3, 8(R7) + ADD R4, R7, R7 + B loop + +slowForwardCopy: + // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we + // can still try 8-byte load stores, provided we can overrun up to 10 extra + // bytes. As above, the overrun will be fixed up by subsequent iterations + // of the outermost loop. + // + // The C++ snappy code calls this technique IncrementalCopyFastPath. Its + // commentary says: + // + // ---- + // + // The main part of this loop is a simple copy of eight bytes at a time + // until we've copied (at least) the requested amount of bytes. However, + // if d and d-offset are less than eight bytes apart (indicating a + // repeating pattern of length < 8), we first need to expand the pattern in + // order to get the correct results. For instance, if the buffer looks like + // this, with the eight-byte and patterns marked as + // intervals: + // + // abxxxxxxxxxxxx + // [------] d-offset + // [------] d + // + // a single eight-byte copy from to will repeat the pattern + // once, after which we can move two bytes without moving : + // + // ababxxxxxxxxxx + // [------] d-offset + // [------] d + // + // and repeat the exercise until the two no longer overlap. + // + // This allows us to do very well in the special case of one single byte + // repeated many times, without taking a big hit for more general cases. + // + // The worst case of extra writing past the end of the match occurs when + // offset == 1 and length == 1; the last copy will read from byte positions + // [0..7] and write to [4..11], whereas it was only supposed to write to + // position 1. Thus, ten excess bytes. + // + // ---- + // + // That "10 byte overrun" worst case is confirmed by Go's + // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy + // and finishSlowForwardCopy algorithm. + // + // if length > len(dst)-d-10 { + // goto verySlowForwardCopy + // } + SUB $10, R14, R14 + CMP R14, R4 + BGT verySlowForwardCopy + +makeOffsetAtLeast8: + // !!! As above, expand the pattern so that offset >= 8 and we can use + // 8-byte load/stores. + // + // for offset < 8 { + // copy 8 bytes from dst[d-offset:] to dst[d:] + // length -= offset + // d += offset + // offset += offset + // // The two previous lines together means that d-offset, and therefore + // // R15, is unchanged. + // } + CMP $8, R5 + BGE fixUpSlowForwardCopy + MOVD (R15), R3 + MOVD R3, (R7) + SUB R5, R4, R4 + ADD R5, R7, R7 + ADD R5, R5, R5 + B makeOffsetAtLeast8 + +fixUpSlowForwardCopy: + // !!! Add length (which might be negative now) to d (implied by R7 being + // &dst[d]) so that d ends up at the right place when we jump back to the + // top of the loop. Before we do that, though, we save R7 to R2 so that, if + // length is positive, copying the remaining length bytes will write to the + // right place. + MOVD R7, R2 + ADD R4, R7, R7 + +finishSlowForwardCopy: + // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative + // length means that we overrun, but as above, that will be fixed up by + // subsequent iterations of the outermost loop. + MOVD $0, R1 + CMP R1, R4 + BLE loop + MOVD (R15), R3 + MOVD R3, (R2) + ADD $8, R15, R15 + ADD $8, R2, R2 + SUB $8, R4, R4 + B finishSlowForwardCopy + +verySlowForwardCopy: + // verySlowForwardCopy is a simple implementation of forward copy. In C + // parlance, this is a do/while loop instead of a while loop, since we know + // that length > 0. In Go syntax: + // + // for { + // dst[d] = dst[d - offset] + // d++ + // length-- + // if length == 0 { + // break + // } + // } + MOVB (R15), R3 + MOVB R3, (R7) + ADD $1, R15, R15 + ADD $1, R7, R7 + SUB $1, R4, R4 + CBNZ R4, verySlowForwardCopy + B loop + + // The code above handles copy tags. + // ---------------------------------------- + +end: + // This is the end of the "for s < len(src)". + // + // if d != len(dst) { etc } + CMP R10, R7 + BNE errCorrupt + + // return 0 + MOVD $0, ret+48(FP) + RET + +errCorrupt: + // return decodeErrCodeCorrupt + MOVD $1, R2 + MOVD R2, ret+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/decode_asm.go b/vendor/github.com/golang/snappy/decode_asm.go new file mode 100644 index 00000000000..7082b349199 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_asm.go @@ -0,0 +1,15 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm +// +build amd64 arm64 + +package snappy + +// decode has the same semantics as in decode_other.go. +// +//go:noescape +func decode(dst, src []byte) int diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go new file mode 100644 index 00000000000..2f672be5574 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_other.go @@ -0,0 +1,115 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!arm64 appengine !gc noasm + +package snappy + +// decode writes the decoding of src to dst. It assumes that the varint-encoded +// length of the decompressed bytes has already been read, and that len(dst) +// equals that length. +// +// It returns 0 on success or a decodeErrCodeXxx error code on failure. +func decode(dst, src []byte) int { + var d, s, offset, length int + for s < len(src) { + switch src[s] & 0x03 { + case tagLiteral: + x := uint32(src[s] >> 2) + switch { + case x < 60: + s++ + case x == 60: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-1]) + case x == 61: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-2]) | uint32(src[s-1])<<8 + case x == 62: + s += 4 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + case x == 63: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + } + length = int(x) + 1 + if length <= 0 { + return decodeErrCodeUnsupportedLiteralLength + } + if length > len(dst)-d || length > len(src)-s { + return decodeErrCodeCorrupt + } + copy(dst[d:], src[s:s+length]) + d += length + s += length + continue + + case tagCopy1: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 4 + int(src[s-2])>>2&0x7 + offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + + case tagCopy2: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + + case tagCopy4: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-5])>>2 + offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + } + + if offset <= 0 || d < offset || length > len(dst)-d { + return decodeErrCodeCorrupt + } + // Copy from an earlier sub-slice of dst to a later sub-slice. + // If no overlap, use the built-in copy: + if offset >= length { + copy(dst[d:d+length], dst[d-offset:]) + d += length + continue + } + + // Unlike the built-in copy function, this byte-by-byte copy always runs + // forwards, even if the slices overlap. Conceptually, this is: + // + // d += forwardCopy(dst[d:d+length], dst[d-offset:]) + // + // We align the slices into a and b and show the compiler they are the same size. + // This allows the loop to run without bounds checks. + a := dst[d : d+length] + b := dst[d-offset:] + b = b[:len(a)] + for i := range a { + a[i] = b[i] + } + d += length + } + if d != len(dst) { + return decodeErrCodeCorrupt + } + return 0 +} diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go new file mode 100644 index 00000000000..7f23657076c --- /dev/null +++ b/vendor/github.com/golang/snappy/encode.go @@ -0,0 +1,289 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" + "io" +) + +// Encode returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// Encode handles the Snappy block format, not the Snappy stream format. +func Encode(dst, src []byte) []byte { + if n := MaxEncodedLen(len(src)); n < 0 { + panic(ErrTooLarge) + } else if len(dst) < n { + dst = make([]byte, n) + } + + // The block starts with the varint-encoded length of the decompressed bytes. + d := binary.PutUvarint(dst, uint64(len(src))) + + for len(src) > 0 { + p := src + src = nil + if len(p) > maxBlockSize { + p, src = p[:maxBlockSize], p[maxBlockSize:] + } + if len(p) < minNonLiteralBlockSize { + d += emitLiteral(dst[d:], p) + } else { + d += encodeBlock(dst[d:], p) + } + } + return dst[:d] +} + +// inputMargin is the minimum number of extra input bytes to keep, inside +// encodeBlock's inner loop. On some architectures, this margin lets us +// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) +// literals can be implemented as a single load to and store from a 16-byte +// register. That literal's actual length can be as short as 1 byte, so this +// can copy up to 15 bytes too much, but that's OK as subsequent iterations of +// the encoding loop will fix up the copy overrun, and this inputMargin ensures +// that we don't overrun the dst and src buffers. +const inputMargin = 16 - 1 + +// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that +// could be encoded with a copy tag. This is the minimum with respect to the +// algorithm used by encodeBlock, not a minimum enforced by the file format. +// +// The encoded output must start with at least a 1 byte literal, as there are +// no previous bytes to copy. A minimal (1 byte) copy after that, generated +// from an emitCopy call in encodeBlock's main loop, would require at least +// another inputMargin bytes, for the reason above: we want any emitLiteral +// calls inside encodeBlock's main loop to use the fast path if possible, which +// requires being able to overrun by inputMargin bytes. Thus, +// minNonLiteralBlockSize equals 1 + 1 + inputMargin. +// +// The C++ code doesn't use this exact threshold, but it could, as discussed at +// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion +// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an +// optimization. It should not affect the encoded form. This is tested by +// TestSameEncodingAsCppShortCopies. +const minNonLiteralBlockSize = 1 + 1 + inputMargin + +// MaxEncodedLen returns the maximum length of a snappy block, given its +// uncompressed length. +// +// It will return a negative value if srcLen is too large to encode. +func MaxEncodedLen(srcLen int) int { + n := uint64(srcLen) + if n > 0xffffffff { + return -1 + } + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // That is, 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + n = 32 + n + n/6 + if n > 0xffffffff { + return -1 + } + return int(n) +} + +var errClosed = errors.New("snappy: Writer is closed") + +// NewWriter returns a new Writer that compresses to w. +// +// The Writer returned does not buffer writes. There is no need to Flush or +// Close such a Writer. +// +// Deprecated: the Writer returned is not suitable for many small writes, only +// for few large writes. Use NewBufferedWriter instead, which is efficient +// regardless of the frequency and shape of the writes, and remember to Close +// that Writer when done. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + obuf: make([]byte, obufLen), + } +} + +// NewBufferedWriter returns a new Writer that compresses to w, using the +// framing format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +// +// The Writer returned buffers writes. Users must call Close to guarantee all +// data has been forwarded to the underlying io.Writer. They may also call +// Flush zero or more times before calling Close. +func NewBufferedWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + ibuf: make([]byte, 0, maxBlockSize), + obuf: make([]byte, obufLen), + } +} + +// Writer is an io.Writer that can write Snappy-compressed bytes. +// +// Writer handles the Snappy stream format, not the Snappy block format. +type Writer struct { + w io.Writer + err error + + // ibuf is a buffer for the incoming (uncompressed) bytes. + // + // Its use is optional. For backwards compatibility, Writers created by the + // NewWriter function have ibuf == nil, do not buffer incoming bytes, and + // therefore do not need to be Flush'ed or Close'd. + ibuf []byte + + // obuf is a buffer for the outgoing (compressed) bytes. + obuf []byte + + // wroteStreamHeader is whether we have written the stream header. + wroteStreamHeader bool +} + +// Reset discards the writer's state and switches the Snappy writer to write to +// w. This permits reusing a Writer rather than allocating a new one. +func (w *Writer) Reset(writer io.Writer) { + w.w = writer + w.err = nil + if w.ibuf != nil { + w.ibuf = w.ibuf[:0] + } + w.wroteStreamHeader = false +} + +// Write satisfies the io.Writer interface. +func (w *Writer) Write(p []byte) (nRet int, errRet error) { + if w.ibuf == nil { + // Do not buffer incoming bytes. This does not perform or compress well + // if the caller of Writer.Write writes many small slices. This + // behavior is therefore deprecated, but still supported for backwards + // compatibility with code that doesn't explicitly Flush or Close. + return w.write(p) + } + + // The remainder of this method is based on bufio.Writer.Write from the + // standard library. + + for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { + var n int + if len(w.ibuf) == 0 { + // Large write, empty buffer. + // Write directly from p to avoid copy. + n, _ = w.write(p) + } else { + n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + w.Flush() + } + nRet += n + p = p[n:] + } + if w.err != nil { + return nRet, w.err + } + n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + nRet += n + return nRet, nil +} + +func (w *Writer) write(p []byte) (nRet int, errRet error) { + if w.err != nil { + return 0, w.err + } + for len(p) > 0 { + obufStart := len(magicChunk) + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + copy(w.obuf, magicChunk) + obufStart = 0 + } + + var uncompressed []byte + if len(p) > maxBlockSize { + uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] + } else { + uncompressed, p = p, nil + } + checksum := crc(uncompressed) + + // Compress the buffer, discarding the result if the improvement + // isn't at least 12.5%. + compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) + chunkType := uint8(chunkTypeCompressedData) + chunkLen := 4 + len(compressed) + obufEnd := obufHeaderLen + len(compressed) + if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { + chunkType = chunkTypeUncompressedData + chunkLen = 4 + len(uncompressed) + obufEnd = obufHeaderLen + } + + // Fill in the per-chunk header that comes before the body. + w.obuf[len(magicChunk)+0] = chunkType + w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) + w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) + w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) + w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) + w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) + w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) + w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) + + if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { + w.err = err + return nRet, err + } + if chunkType == chunkTypeUncompressedData { + if _, err := w.w.Write(uncompressed); err != nil { + w.err = err + return nRet, err + } + } + nRet += len(uncompressed) + } + return nRet, nil +} + +// Flush flushes the Writer to its underlying io.Writer. +func (w *Writer) Flush() error { + if w.err != nil { + return w.err + } + if len(w.ibuf) == 0 { + return nil + } + w.write(w.ibuf) + w.ibuf = w.ibuf[:0] + return w.err +} + +// Close calls Flush and then closes the Writer. +func (w *Writer) Close() error { + w.Flush() + ret := w.err + if w.err == nil { + w.err = errClosed + } + return ret +} diff --git a/vendor/github.com/golang/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s new file mode 100644 index 00000000000..adfd979fe27 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_amd64.s @@ -0,0 +1,730 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a +// Go toolchain regression. See https://github.com/golang/go/issues/15426 and +// https://github.com/golang/snappy/issues/29 +// +// As a workaround, the package was built with a known good assembler, and +// those instructions were disassembled by "objdump -d" to yield the +// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 +// style comments, in AT&T asm syntax. Note that rsp here is a physical +// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). +// The instructions were then encoded as "BYTE $0x.." sequences, which assemble +// fine on Go 1.6. + +// The asm code generally follows the pure Go code in encode_other.go, except +// where marked with a "!!!". + +// ---------------------------------------------------------------------------- + +// func emitLiteral(dst, lit []byte) int +// +// All local variables fit into registers. The register allocation: +// - AX len(lit) +// - BX n +// - DX return value +// - DI &dst[i] +// - R10 &lit[0] +// +// The 24 bytes of stack space is to call runtime·memmove. +// +// The unusual register allocation of local variables, such as R10 for the +// source pointer, matches the allocation used at the call site in encodeBlock, +// which makes it easier to manually inline this function. +TEXT ·emitLiteral(SB), NOSPLIT, $24-56 + MOVQ dst_base+0(FP), DI + MOVQ lit_base+24(FP), R10 + MOVQ lit_len+32(FP), AX + MOVQ AX, DX + MOVL AX, BX + SUBL $1, BX + + CMPL BX, $60 + JLT oneByte + CMPL BX, $256 + JLT twoBytes + +threeBytes: + MOVB $0xf4, 0(DI) + MOVW BX, 1(DI) + ADDQ $3, DI + ADDQ $3, DX + JMP memmove + +twoBytes: + MOVB $0xf0, 0(DI) + MOVB BX, 1(DI) + ADDQ $2, DI + ADDQ $2, DX + JMP memmove + +oneByte: + SHLB $2, BX + MOVB BX, 0(DI) + ADDQ $1, DI + ADDQ $1, DX + +memmove: + MOVQ DX, ret+48(FP) + + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // DI, R10 and AX as arguments. + MOVQ DI, 0(SP) + MOVQ R10, 8(SP) + MOVQ AX, 16(SP) + CALL runtime·memmove(SB) + RET + +// ---------------------------------------------------------------------------- + +// func emitCopy(dst []byte, offset, length int) int +// +// All local variables fit into registers. The register allocation: +// - AX length +// - SI &dst[0] +// - DI &dst[i] +// - R11 offset +// +// The unusual register allocation of local variables, such as R11 for the +// offset, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·emitCopy(SB), NOSPLIT, $0-48 + MOVQ dst_base+0(FP), DI + MOVQ DI, SI + MOVQ offset+24(FP), R11 + MOVQ length+32(FP), AX + +loop0: + // for length >= 68 { etc } + CMPL AX, $68 + JLT step1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVB $0xfe, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $64, AX + JMP loop0 + +step1: + // if length > 64 { etc } + CMPL AX, $64 + JLE step2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVB $0xee, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $60, AX + +step2: + // if length >= 12 || offset >= 2048 { goto step3 } + CMPL AX, $12 + JGE step3 + CMPL R11, $2048 + JGE step3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(DI) + SHRL $8, R11 + SHLB $5, R11 + SUBB $4, AX + SHLB $2, AX + ORB AX, R11 + ORB $1, R11 + MOVB R11, 0(DI) + ADDQ $2, DI + + // Return the number of bytes written. + SUBQ SI, DI + MOVQ DI, ret+40(FP) + RET + +step3: + // Emit the remaining copy, encoded as 3 bytes. + SUBL $1, AX + SHLB $2, AX + ORB $2, AX + MOVB AX, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + + // Return the number of bytes written. + SUBQ SI, DI + MOVQ DI, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func extendMatch(src []byte, i, j int) int +// +// All local variables fit into registers. The register allocation: +// - DX &src[0] +// - SI &src[j] +// - R13 &src[len(src) - 8] +// - R14 &src[len(src)] +// - R15 &src[i] +// +// The unusual register allocation of local variables, such as R15 for a source +// pointer, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·extendMatch(SB), NOSPLIT, $0-48 + MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), R14 + MOVQ i+24(FP), R15 + MOVQ j+32(FP), SI + ADDQ DX, R14 + ADDQ DX, R15 + ADDQ DX, SI + MOVQ R14, R13 + SUBQ $8, R13 + +cmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ SI, R13 + JA cmp1 + MOVQ (R15), AX + MOVQ (SI), BX + CMPQ AX, BX + JNE bsf + ADDQ $8, R15 + ADDQ $8, SI + JMP cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, SI + + // Convert from &src[ret] to ret. + SUBQ DX, SI + MOVQ SI, ret+40(FP) + RET + +cmp1: + // In src's tail, compare 1 byte at a time. + CMPQ SI, R14 + JAE extendMatchEnd + MOVB (R15), AX + MOVB (SI), BX + CMPB AX, BX + JNE extendMatchEnd + ADDQ $1, R15 + ADDQ $1, SI + JMP cmp1 + +extendMatchEnd: + // Convert from &src[ret] to ret. + SUBQ DX, SI + MOVQ SI, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func encodeBlock(dst, src []byte) (d int) +// +// All local variables fit into registers, other than "var table". The register +// allocation: +// - AX . . +// - BX . . +// - CX 56 shift (note that amd64 shifts by non-immediates must use CX). +// - DX 64 &src[0], tableSize +// - SI 72 &src[s] +// - DI 80 &dst[d] +// - R9 88 sLimit +// - R10 . &src[nextEmit] +// - R11 96 prevHash, currHash, nextHash, offset +// - R12 104 &src[base], skip +// - R13 . &src[nextS], &src[len(src) - 8] +// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x +// - R15 112 candidate +// +// The second column (56, 64, etc) is the stack offset to spill the registers +// when calling other functions. We could pack this slightly tighter, but it's +// simpler to have a dedicated spill map independent of the function called. +// +// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An +// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill +// local variables (registers) during calls gives 32768 + 56 + 64 = 32888. +TEXT ·encodeBlock(SB), 0, $32888-56 + MOVQ dst_base+0(FP), DI + MOVQ src_base+24(FP), SI + MOVQ src_len+32(FP), R14 + + // shift, tableSize := uint32(32-8), 1<<8 + MOVQ $24, CX + MOVQ $256, DX + +calcShift: + // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + // shift-- + // } + CMPQ DX, $16384 + JGE varTable + CMPQ DX, R14 + JGE varTable + SUBQ $1, CX + SHLQ $1, DX + JMP calcShift + +varTable: + // var table [maxTableSize]uint16 + // + // In the asm code, unlike the Go code, we can zero-initialize only the + // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU + // writes 16 bytes, so we can do only tableSize/8 writes instead of the + // 2048 writes that would zero-initialize all of table's 32768 bytes. + SHRQ $3, DX + LEAQ table-32768(SP), BX + PXOR X0, X0 + +memclr: + MOVOU X0, 0(BX) + ADDQ $16, BX + SUBQ $1, DX + JNZ memclr + + // !!! DX = &src[0] + MOVQ SI, DX + + // sLimit := len(src) - inputMargin + MOVQ R14, R9 + SUBQ $15, R9 + + // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't + // change for the rest of the function. + MOVQ CX, 56(SP) + MOVQ DX, 64(SP) + MOVQ R9, 88(SP) + + // nextEmit := 0 + MOVQ DX, R10 + + // s := 1 + ADDQ $1, SI + + // nextHash := hash(load32(src, s), shift) + MOVL 0(SI), R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + +outer: + // for { etc } + + // skip := 32 + MOVQ $32, R12 + + // nextS := s + MOVQ SI, R13 + + // candidate := 0 + MOVQ $0, R15 + +inner0: + // for { etc } + + // s := nextS + MOVQ R13, SI + + // bytesBetweenHashLookups := skip >> 5 + MOVQ R12, R14 + SHRQ $5, R14 + + // nextS = s + bytesBetweenHashLookups + ADDQ R14, R13 + + // skip += bytesBetweenHashLookups + ADDQ R14, R12 + + // if nextS > sLimit { goto emitRemainder } + MOVQ R13, AX + SUBQ DX, AX + CMPQ AX, R9 + JA emitRemainder + + // candidate = int(table[nextHash]) + // XXX: MOVWQZX table-32768(SP)(R11*2), R15 + // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 + BYTE $0x4e + BYTE $0x0f + BYTE $0xb7 + BYTE $0x7c + BYTE $0x5c + BYTE $0x78 + + // table[nextHash] = uint16(s) + MOVQ SI, AX + SUBQ DX, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // nextHash = hash(load32(src, nextS), shift) + MOVL 0(R13), R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // if load32(src, s) != load32(src, candidate) { continue } break + MOVL 0(SI), AX + MOVL (DX)(R15*1), BX + CMPL AX, BX + JNE inner0 + +fourByteMatch: + // As per the encode_other.go code: + // + // A 4-byte match has been found. We'll later see etc. + + // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment + // on inputMargin in encode.go. + MOVQ SI, AX + SUBQ R10, AX + CMPQ AX, $16 + JLE emitLiteralFastPath + + // ---------------------------------------- + // Begin inline of the emitLiteral call. + // + // d += emitLiteral(dst[d:], src[nextEmit:s]) + + MOVL AX, BX + SUBL $1, BX + + CMPL BX, $60 + JLT inlineEmitLiteralOneByte + CMPL BX, $256 + JLT inlineEmitLiteralTwoBytes + +inlineEmitLiteralThreeBytes: + MOVB $0xf4, 0(DI) + MOVW BX, 1(DI) + ADDQ $3, DI + JMP inlineEmitLiteralMemmove + +inlineEmitLiteralTwoBytes: + MOVB $0xf0, 0(DI) + MOVB BX, 1(DI) + ADDQ $2, DI + JMP inlineEmitLiteralMemmove + +inlineEmitLiteralOneByte: + SHLB $2, BX + MOVB BX, 0(DI) + ADDQ $1, DI + +inlineEmitLiteralMemmove: + // Spill local variables (registers) onto the stack; call; unspill. + // + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // DI, R10 and AX as arguments. + MOVQ DI, 0(SP) + MOVQ R10, 8(SP) + MOVQ AX, 16(SP) + ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)". + MOVQ SI, 72(SP) + MOVQ DI, 80(SP) + MOVQ R15, 112(SP) + CALL runtime·memmove(SB) + MOVQ 56(SP), CX + MOVQ 64(SP), DX + MOVQ 72(SP), SI + MOVQ 80(SP), DI + MOVQ 88(SP), R9 + MOVQ 112(SP), R15 + JMP inner1 + +inlineEmitLiteralEnd: + // End inline of the emitLiteral call. + // ---------------------------------------- + +emitLiteralFastPath: + // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". + MOVB AX, BX + SUBB $1, BX + SHLB $2, BX + MOVB BX, (DI) + ADDQ $1, DI + + // !!! Implement the copy from lit to dst as a 16-byte load and store. + // (Encode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only len(lit) bytes, but that's + // OK. Subsequent iterations will fix up the overrun. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(R10), X0 + MOVOU X0, 0(DI) + ADDQ AX, DI + +inner1: + // for { etc } + + // base := s + MOVQ SI, R12 + + // !!! offset := base - candidate + MOVQ R12, R11 + SUBQ R15, R11 + SUBQ DX, R11 + + // ---------------------------------------- + // Begin inline of the extendMatch call. + // + // s = extendMatch(src, candidate+4, s+4) + + // !!! R14 = &src[len(src)] + MOVQ src_len+32(FP), R14 + ADDQ DX, R14 + + // !!! R13 = &src[len(src) - 8] + MOVQ R14, R13 + SUBQ $8, R13 + + // !!! R15 = &src[candidate + 4] + ADDQ $4, R15 + ADDQ DX, R15 + + // !!! s += 4 + ADDQ $4, SI + +inlineExtendMatchCmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ SI, R13 + JA inlineExtendMatchCmp1 + MOVQ (R15), AX + MOVQ (SI), BX + CMPQ AX, BX + JNE inlineExtendMatchBSF + ADDQ $8, R15 + ADDQ $8, SI + JMP inlineExtendMatchCmp8 + +inlineExtendMatchBSF: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, SI + JMP inlineExtendMatchEnd + +inlineExtendMatchCmp1: + // In src's tail, compare 1 byte at a time. + CMPQ SI, R14 + JAE inlineExtendMatchEnd + MOVB (R15), AX + MOVB (SI), BX + CMPB AX, BX + JNE inlineExtendMatchEnd + ADDQ $1, R15 + ADDQ $1, SI + JMP inlineExtendMatchCmp1 + +inlineExtendMatchEnd: + // End inline of the extendMatch call. + // ---------------------------------------- + + // ---------------------------------------- + // Begin inline of the emitCopy call. + // + // d += emitCopy(dst[d:], base-candidate, s-base) + + // !!! length := s - base + MOVQ SI, AX + SUBQ R12, AX + +inlineEmitCopyLoop0: + // for length >= 68 { etc } + CMPL AX, $68 + JLT inlineEmitCopyStep1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVB $0xfe, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $64, AX + JMP inlineEmitCopyLoop0 + +inlineEmitCopyStep1: + // if length > 64 { etc } + CMPL AX, $64 + JLE inlineEmitCopyStep2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVB $0xee, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $60, AX + +inlineEmitCopyStep2: + // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } + CMPL AX, $12 + JGE inlineEmitCopyStep3 + CMPL R11, $2048 + JGE inlineEmitCopyStep3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(DI) + SHRL $8, R11 + SHLB $5, R11 + SUBB $4, AX + SHLB $2, AX + ORB AX, R11 + ORB $1, R11 + MOVB R11, 0(DI) + ADDQ $2, DI + JMP inlineEmitCopyEnd + +inlineEmitCopyStep3: + // Emit the remaining copy, encoded as 3 bytes. + SUBL $1, AX + SHLB $2, AX + ORB $2, AX + MOVB AX, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + +inlineEmitCopyEnd: + // End inline of the emitCopy call. + // ---------------------------------------- + + // nextEmit = s + MOVQ SI, R10 + + // if s >= sLimit { goto emitRemainder } + MOVQ SI, AX + SUBQ DX, AX + CMPQ AX, R9 + JAE emitRemainder + + // As per the encode_other.go code: + // + // We could immediately etc. + + // x := load64(src, s-1) + MOVQ -1(SI), R14 + + // prevHash := hash(uint32(x>>0), shift) + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // table[prevHash] = uint16(s-1) + MOVQ SI, AX + SUBQ DX, AX + SUBQ $1, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // currHash := hash(uint32(x>>8), shift) + SHRQ $8, R14 + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // candidate = int(table[currHash]) + // XXX: MOVWQZX table-32768(SP)(R11*2), R15 + // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 + BYTE $0x4e + BYTE $0x0f + BYTE $0xb7 + BYTE $0x7c + BYTE $0x5c + BYTE $0x78 + + // table[currHash] = uint16(s) + ADDQ $1, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // if uint32(x>>8) == load32(src, candidate) { continue } + MOVL (DX)(R15*1), BX + CMPL R14, BX + JEQ inner1 + + // nextHash = hash(uint32(x>>16), shift) + SHRQ $8, R14 + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // s++ + ADDQ $1, SI + + // break out of the inner1 for loop, i.e. continue the outer loop. + JMP outer + +emitRemainder: + // if nextEmit < len(src) { etc } + MOVQ src_len+32(FP), AX + ADDQ DX, AX + CMPQ R10, AX + JEQ encodeBlockEnd + + // d += emitLiteral(dst[d:], src[nextEmit:]) + // + // Push args. + MOVQ DI, 0(SP) + MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative. + MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative. + MOVQ R10, 24(SP) + SUBQ R10, AX + MOVQ AX, 32(SP) + MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative. + + // Spill local variables (registers) onto the stack; call; unspill. + MOVQ DI, 80(SP) + CALL ·emitLiteral(SB) + MOVQ 80(SP), DI + + // Finish the "d +=" part of "d += emitLiteral(etc)". + ADDQ 48(SP), DI + +encodeBlockEnd: + MOVQ dst_base+0(FP), AX + SUBQ AX, DI + MOVQ DI, d+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/encode_arm64.s b/vendor/github.com/golang/snappy/encode_arm64.s new file mode 100644 index 00000000000..f8d54adfc5c --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_arm64.s @@ -0,0 +1,722 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in encode_other.go, except +// where marked with a "!!!". + +// ---------------------------------------------------------------------------- + +// func emitLiteral(dst, lit []byte) int +// +// All local variables fit into registers. The register allocation: +// - R3 len(lit) +// - R4 n +// - R6 return value +// - R8 &dst[i] +// - R10 &lit[0] +// +// The 32 bytes of stack space is to call runtime·memmove. +// +// The unusual register allocation of local variables, such as R10 for the +// source pointer, matches the allocation used at the call site in encodeBlock, +// which makes it easier to manually inline this function. +TEXT ·emitLiteral(SB), NOSPLIT, $32-56 + MOVD dst_base+0(FP), R8 + MOVD lit_base+24(FP), R10 + MOVD lit_len+32(FP), R3 + MOVD R3, R6 + MOVW R3, R4 + SUBW $1, R4, R4 + + CMPW $60, R4 + BLT oneByte + CMPW $256, R4 + BLT twoBytes + +threeBytes: + MOVD $0xf4, R2 + MOVB R2, 0(R8) + MOVW R4, 1(R8) + ADD $3, R8, R8 + ADD $3, R6, R6 + B memmove + +twoBytes: + MOVD $0xf0, R2 + MOVB R2, 0(R8) + MOVB R4, 1(R8) + ADD $2, R8, R8 + ADD $2, R6, R6 + B memmove + +oneByte: + LSLW $2, R4, R4 + MOVB R4, 0(R8) + ADD $1, R8, R8 + ADD $1, R6, R6 + +memmove: + MOVD R6, ret+48(FP) + + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // R8, R10 and R3 as arguments. + MOVD R8, 8(RSP) + MOVD R10, 16(RSP) + MOVD R3, 24(RSP) + CALL runtime·memmove(SB) + RET + +// ---------------------------------------------------------------------------- + +// func emitCopy(dst []byte, offset, length int) int +// +// All local variables fit into registers. The register allocation: +// - R3 length +// - R7 &dst[0] +// - R8 &dst[i] +// - R11 offset +// +// The unusual register allocation of local variables, such as R11 for the +// offset, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·emitCopy(SB), NOSPLIT, $0-48 + MOVD dst_base+0(FP), R8 + MOVD R8, R7 + MOVD offset+24(FP), R11 + MOVD length+32(FP), R3 + +loop0: + // for length >= 68 { etc } + CMPW $68, R3 + BLT step1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVD $0xfe, R2 + MOVB R2, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUB $64, R3, R3 + B loop0 + +step1: + // if length > 64 { etc } + CMP $64, R3 + BLE step2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVD $0xee, R2 + MOVB R2, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUB $60, R3, R3 + +step2: + // if length >= 12 || offset >= 2048 { goto step3 } + CMP $12, R3 + BGE step3 + CMPW $2048, R11 + BGE step3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(R8) + LSRW $3, R11, R11 + AND $0xe0, R11, R11 + SUB $4, R3, R3 + LSLW $2, R3 + AND $0xff, R3, R3 + ORRW R3, R11, R11 + ORRW $1, R11, R11 + MOVB R11, 0(R8) + ADD $2, R8, R8 + + // Return the number of bytes written. + SUB R7, R8, R8 + MOVD R8, ret+40(FP) + RET + +step3: + // Emit the remaining copy, encoded as 3 bytes. + SUB $1, R3, R3 + AND $0xff, R3, R3 + LSLW $2, R3, R3 + ORRW $2, R3, R3 + MOVB R3, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + + // Return the number of bytes written. + SUB R7, R8, R8 + MOVD R8, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func extendMatch(src []byte, i, j int) int +// +// All local variables fit into registers. The register allocation: +// - R6 &src[0] +// - R7 &src[j] +// - R13 &src[len(src) - 8] +// - R14 &src[len(src)] +// - R15 &src[i] +// +// The unusual register allocation of local variables, such as R15 for a source +// pointer, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·extendMatch(SB), NOSPLIT, $0-48 + MOVD src_base+0(FP), R6 + MOVD src_len+8(FP), R14 + MOVD i+24(FP), R15 + MOVD j+32(FP), R7 + ADD R6, R14, R14 + ADD R6, R15, R15 + ADD R6, R7, R7 + MOVD R14, R13 + SUB $8, R13, R13 + +cmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMP R13, R7 + BHI cmp1 + MOVD (R15), R3 + MOVD (R7), R4 + CMP R4, R3 + BNE bsf + ADD $8, R15, R15 + ADD $8, R7, R7 + B cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. + // RBIT reverses the bit order, then CLZ counts the leading zeros, the + // combination of which finds the least significant bit which is set. + // The arm64 architecture is little-endian, and the shift by 3 converts + // a bit index to a byte index. + EOR R3, R4, R4 + RBIT R4, R4 + CLZ R4, R4 + ADD R4>>3, R7, R7 + + // Convert from &src[ret] to ret. + SUB R6, R7, R7 + MOVD R7, ret+40(FP) + RET + +cmp1: + // In src's tail, compare 1 byte at a time. + CMP R7, R14 + BLS extendMatchEnd + MOVB (R15), R3 + MOVB (R7), R4 + CMP R4, R3 + BNE extendMatchEnd + ADD $1, R15, R15 + ADD $1, R7, R7 + B cmp1 + +extendMatchEnd: + // Convert from &src[ret] to ret. + SUB R6, R7, R7 + MOVD R7, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func encodeBlock(dst, src []byte) (d int) +// +// All local variables fit into registers, other than "var table". The register +// allocation: +// - R3 . . +// - R4 . . +// - R5 64 shift +// - R6 72 &src[0], tableSize +// - R7 80 &src[s] +// - R8 88 &dst[d] +// - R9 96 sLimit +// - R10 . &src[nextEmit] +// - R11 104 prevHash, currHash, nextHash, offset +// - R12 112 &src[base], skip +// - R13 . &src[nextS], &src[len(src) - 8] +// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x +// - R15 120 candidate +// - R16 . hash constant, 0x1e35a7bd +// - R17 . &table +// - . 128 table +// +// The second column (64, 72, etc) is the stack offset to spill the registers +// when calling other functions. We could pack this slightly tighter, but it's +// simpler to have a dedicated spill map independent of the function called. +// +// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An +// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill +// local variables (registers) during calls gives 32768 + 64 + 64 = 32896. +TEXT ·encodeBlock(SB), 0, $32896-56 + MOVD dst_base+0(FP), R8 + MOVD src_base+24(FP), R7 + MOVD src_len+32(FP), R14 + + // shift, tableSize := uint32(32-8), 1<<8 + MOVD $24, R5 + MOVD $256, R6 + MOVW $0xa7bd, R16 + MOVKW $(0x1e35<<16), R16 + +calcShift: + // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + // shift-- + // } + MOVD $16384, R2 + CMP R2, R6 + BGE varTable + CMP R14, R6 + BGE varTable + SUB $1, R5, R5 + LSL $1, R6, R6 + B calcShift + +varTable: + // var table [maxTableSize]uint16 + // + // In the asm code, unlike the Go code, we can zero-initialize only the + // first tableSize elements. Each uint16 element is 2 bytes and each + // iterations writes 64 bytes, so we can do only tableSize/32 writes + // instead of the 2048 writes that would zero-initialize all of table's + // 32768 bytes. This clear could overrun the first tableSize elements, but + // it won't overrun the allocated stack size. + ADD $128, RSP, R17 + MOVD R17, R4 + + // !!! R6 = &src[tableSize] + ADD R6<<1, R17, R6 + +memclr: + STP.P (ZR, ZR), 64(R4) + STP (ZR, ZR), -48(R4) + STP (ZR, ZR), -32(R4) + STP (ZR, ZR), -16(R4) + CMP R4, R6 + BHI memclr + + // !!! R6 = &src[0] + MOVD R7, R6 + + // sLimit := len(src) - inputMargin + MOVD R14, R9 + SUB $15, R9, R9 + + // !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't + // change for the rest of the function. + MOVD R5, 64(RSP) + MOVD R6, 72(RSP) + MOVD R9, 96(RSP) + + // nextEmit := 0 + MOVD R6, R10 + + // s := 1 + ADD $1, R7, R7 + + // nextHash := hash(load32(src, s), shift) + MOVW 0(R7), R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + +outer: + // for { etc } + + // skip := 32 + MOVD $32, R12 + + // nextS := s + MOVD R7, R13 + + // candidate := 0 + MOVD $0, R15 + +inner0: + // for { etc } + + // s := nextS + MOVD R13, R7 + + // bytesBetweenHashLookups := skip >> 5 + MOVD R12, R14 + LSR $5, R14, R14 + + // nextS = s + bytesBetweenHashLookups + ADD R14, R13, R13 + + // skip += bytesBetweenHashLookups + ADD R14, R12, R12 + + // if nextS > sLimit { goto emitRemainder } + MOVD R13, R3 + SUB R6, R3, R3 + CMP R9, R3 + BHI emitRemainder + + // candidate = int(table[nextHash]) + MOVHU 0(R17)(R11<<1), R15 + + // table[nextHash] = uint16(s) + MOVD R7, R3 + SUB R6, R3, R3 + + MOVH R3, 0(R17)(R11<<1) + + // nextHash = hash(load32(src, nextS), shift) + MOVW 0(R13), R11 + MULW R16, R11 + LSRW R5, R11, R11 + + // if load32(src, s) != load32(src, candidate) { continue } break + MOVW 0(R7), R3 + MOVW (R6)(R15), R4 + CMPW R4, R3 + BNE inner0 + +fourByteMatch: + // As per the encode_other.go code: + // + // A 4-byte match has been found. We'll later see etc. + + // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment + // on inputMargin in encode.go. + MOVD R7, R3 + SUB R10, R3, R3 + CMP $16, R3 + BLE emitLiteralFastPath + + // ---------------------------------------- + // Begin inline of the emitLiteral call. + // + // d += emitLiteral(dst[d:], src[nextEmit:s]) + + MOVW R3, R4 + SUBW $1, R4, R4 + + MOVW $60, R2 + CMPW R2, R4 + BLT inlineEmitLiteralOneByte + MOVW $256, R2 + CMPW R2, R4 + BLT inlineEmitLiteralTwoBytes + +inlineEmitLiteralThreeBytes: + MOVD $0xf4, R1 + MOVB R1, 0(R8) + MOVW R4, 1(R8) + ADD $3, R8, R8 + B inlineEmitLiteralMemmove + +inlineEmitLiteralTwoBytes: + MOVD $0xf0, R1 + MOVB R1, 0(R8) + MOVB R4, 1(R8) + ADD $2, R8, R8 + B inlineEmitLiteralMemmove + +inlineEmitLiteralOneByte: + LSLW $2, R4, R4 + MOVB R4, 0(R8) + ADD $1, R8, R8 + +inlineEmitLiteralMemmove: + // Spill local variables (registers) onto the stack; call; unspill. + // + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // R8, R10 and R3 as arguments. + MOVD R8, 8(RSP) + MOVD R10, 16(RSP) + MOVD R3, 24(RSP) + + // Finish the "d +=" part of "d += emitLiteral(etc)". + ADD R3, R8, R8 + MOVD R7, 80(RSP) + MOVD R8, 88(RSP) + MOVD R15, 120(RSP) + CALL runtime·memmove(SB) + MOVD 64(RSP), R5 + MOVD 72(RSP), R6 + MOVD 80(RSP), R7 + MOVD 88(RSP), R8 + MOVD 96(RSP), R9 + MOVD 120(RSP), R15 + ADD $128, RSP, R17 + MOVW $0xa7bd, R16 + MOVKW $(0x1e35<<16), R16 + B inner1 + +inlineEmitLiteralEnd: + // End inline of the emitLiteral call. + // ---------------------------------------- + +emitLiteralFastPath: + // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". + MOVB R3, R4 + SUBW $1, R4, R4 + AND $0xff, R4, R4 + LSLW $2, R4, R4 + MOVB R4, (R8) + ADD $1, R8, R8 + + // !!! Implement the copy from lit to dst as a 16-byte load and store. + // (Encode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only len(lit) bytes, but that's + // OK. Subsequent iterations will fix up the overrun. + // + // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + LDP 0(R10), (R0, R1) + STP (R0, R1), 0(R8) + ADD R3, R8, R8 + +inner1: + // for { etc } + + // base := s + MOVD R7, R12 + + // !!! offset := base - candidate + MOVD R12, R11 + SUB R15, R11, R11 + SUB R6, R11, R11 + + // ---------------------------------------- + // Begin inline of the extendMatch call. + // + // s = extendMatch(src, candidate+4, s+4) + + // !!! R14 = &src[len(src)] + MOVD src_len+32(FP), R14 + ADD R6, R14, R14 + + // !!! R13 = &src[len(src) - 8] + MOVD R14, R13 + SUB $8, R13, R13 + + // !!! R15 = &src[candidate + 4] + ADD $4, R15, R15 + ADD R6, R15, R15 + + // !!! s += 4 + ADD $4, R7, R7 + +inlineExtendMatchCmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMP R13, R7 + BHI inlineExtendMatchCmp1 + MOVD (R15), R3 + MOVD (R7), R4 + CMP R4, R3 + BNE inlineExtendMatchBSF + ADD $8, R15, R15 + ADD $8, R7, R7 + B inlineExtendMatchCmp8 + +inlineExtendMatchBSF: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. + // RBIT reverses the bit order, then CLZ counts the leading zeros, the + // combination of which finds the least significant bit which is set. + // The arm64 architecture is little-endian, and the shift by 3 converts + // a bit index to a byte index. + EOR R3, R4, R4 + RBIT R4, R4 + CLZ R4, R4 + ADD R4>>3, R7, R7 + B inlineExtendMatchEnd + +inlineExtendMatchCmp1: + // In src's tail, compare 1 byte at a time. + CMP R7, R14 + BLS inlineExtendMatchEnd + MOVB (R15), R3 + MOVB (R7), R4 + CMP R4, R3 + BNE inlineExtendMatchEnd + ADD $1, R15, R15 + ADD $1, R7, R7 + B inlineExtendMatchCmp1 + +inlineExtendMatchEnd: + // End inline of the extendMatch call. + // ---------------------------------------- + + // ---------------------------------------- + // Begin inline of the emitCopy call. + // + // d += emitCopy(dst[d:], base-candidate, s-base) + + // !!! length := s - base + MOVD R7, R3 + SUB R12, R3, R3 + +inlineEmitCopyLoop0: + // for length >= 68 { etc } + MOVW $68, R2 + CMPW R2, R3 + BLT inlineEmitCopyStep1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVD $0xfe, R1 + MOVB R1, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUBW $64, R3, R3 + B inlineEmitCopyLoop0 + +inlineEmitCopyStep1: + // if length > 64 { etc } + MOVW $64, R2 + CMPW R2, R3 + BLE inlineEmitCopyStep2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVD $0xee, R1 + MOVB R1, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUBW $60, R3, R3 + +inlineEmitCopyStep2: + // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } + MOVW $12, R2 + CMPW R2, R3 + BGE inlineEmitCopyStep3 + MOVW $2048, R2 + CMPW R2, R11 + BGE inlineEmitCopyStep3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(R8) + LSRW $8, R11, R11 + LSLW $5, R11, R11 + SUBW $4, R3, R3 + AND $0xff, R3, R3 + LSLW $2, R3, R3 + ORRW R3, R11, R11 + ORRW $1, R11, R11 + MOVB R11, 0(R8) + ADD $2, R8, R8 + B inlineEmitCopyEnd + +inlineEmitCopyStep3: + // Emit the remaining copy, encoded as 3 bytes. + SUBW $1, R3, R3 + LSLW $2, R3, R3 + ORRW $2, R3, R3 + MOVB R3, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + +inlineEmitCopyEnd: + // End inline of the emitCopy call. + // ---------------------------------------- + + // nextEmit = s + MOVD R7, R10 + + // if s >= sLimit { goto emitRemainder } + MOVD R7, R3 + SUB R6, R3, R3 + CMP R3, R9 + BLS emitRemainder + + // As per the encode_other.go code: + // + // We could immediately etc. + + // x := load64(src, s-1) + MOVD -1(R7), R14 + + // prevHash := hash(uint32(x>>0), shift) + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // table[prevHash] = uint16(s-1) + MOVD R7, R3 + SUB R6, R3, R3 + SUB $1, R3, R3 + + MOVHU R3, 0(R17)(R11<<1) + + // currHash := hash(uint32(x>>8), shift) + LSR $8, R14, R14 + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // candidate = int(table[currHash]) + MOVHU 0(R17)(R11<<1), R15 + + // table[currHash] = uint16(s) + ADD $1, R3, R3 + MOVHU R3, 0(R17)(R11<<1) + + // if uint32(x>>8) == load32(src, candidate) { continue } + MOVW (R6)(R15), R4 + CMPW R4, R14 + BEQ inner1 + + // nextHash = hash(uint32(x>>16), shift) + LSR $8, R14, R14 + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // s++ + ADD $1, R7, R7 + + // break out of the inner1 for loop, i.e. continue the outer loop. + B outer + +emitRemainder: + // if nextEmit < len(src) { etc } + MOVD src_len+32(FP), R3 + ADD R6, R3, R3 + CMP R3, R10 + BEQ encodeBlockEnd + + // d += emitLiteral(dst[d:], src[nextEmit:]) + // + // Push args. + MOVD R8, 8(RSP) + MOVD $0, 16(RSP) // Unnecessary, as the callee ignores it, but conservative. + MOVD $0, 24(RSP) // Unnecessary, as the callee ignores it, but conservative. + MOVD R10, 32(RSP) + SUB R10, R3, R3 + MOVD R3, 40(RSP) + MOVD R3, 48(RSP) // Unnecessary, as the callee ignores it, but conservative. + + // Spill local variables (registers) onto the stack; call; unspill. + MOVD R8, 88(RSP) + CALL ·emitLiteral(SB) + MOVD 88(RSP), R8 + + // Finish the "d +=" part of "d += emitLiteral(etc)". + MOVD 56(RSP), R1 + ADD R1, R8, R8 + +encodeBlockEnd: + MOVD dst_base+0(FP), R3 + SUB R3, R8, R8 + MOVD R8, d+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/encode_asm.go b/vendor/github.com/golang/snappy/encode_asm.go new file mode 100644 index 00000000000..107c1e71418 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_asm.go @@ -0,0 +1,30 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm +// +build amd64 arm64 + +package snappy + +// emitLiteral has the same semantics as in encode_other.go. +// +//go:noescape +func emitLiteral(dst, lit []byte) int + +// emitCopy has the same semantics as in encode_other.go. +// +//go:noescape +func emitCopy(dst []byte, offset, length int) int + +// extendMatch has the same semantics as in encode_other.go. +// +//go:noescape +func extendMatch(src []byte, i, j int) int + +// encodeBlock has the same semantics as in encode_other.go. +// +//go:noescape +func encodeBlock(dst, src []byte) (d int) diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go new file mode 100644 index 00000000000..296d7f0beb0 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_other.go @@ -0,0 +1,238 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!arm64 appengine !gc noasm + +package snappy + +func load32(b []byte, i int) uint32 { + b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= len(lit) && len(lit) <= 65536 +func emitLiteral(dst, lit []byte) int { + i, n := 0, uint(len(lit)-1) + switch { + case n < 60: + dst[0] = uint8(n)<<2 | tagLiteral + i = 1 + case n < 1<<8: + dst[0] = 60<<2 | tagLiteral + dst[1] = uint8(n) + i = 2 + default: + dst[0] = 61<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + i = 3 + } + return i + copy(dst[i:], lit) +} + +// emitCopy writes a copy chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= offset && offset <= 65535 +// 4 <= length && length <= 65535 +func emitCopy(dst []byte, offset, length int) int { + i := 0 + // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The + // threshold for this loop is a little higher (at 68 = 64 + 4), and the + // length emitted down below is is a little lower (at 60 = 64 - 4), because + // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed + // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as + // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as + // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a + // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an + // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. + for length >= 68 { + // Emit a length 64 copy, encoded as 3 bytes. + dst[i+0] = 63<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 64 + } + if length > 64 { + // Emit a length 60 copy, encoded as 3 bytes. + dst[i+0] = 59<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 60 + } + if length >= 12 || offset >= 2048 { + // Emit the remaining copy, encoded as 3 bytes. + dst[i+0] = uint8(length-1)<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + return i + 3 + } + // Emit the remaining copy, encoded as 2 bytes. + dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + dst[i+1] = uint8(offset) + return i + 2 +} + +// extendMatch returns the largest k such that k <= len(src) and that +// src[i:i+k-j] and src[j:k] have the same contents. +// +// It assumes that: +// 0 <= i && i < j && j <= len(src) +func extendMatch(src []byte, i, j int) int { + for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { + } + return j +} + +func hash(u, shift uint32) uint32 { + return (u * 0x1e35a7bd) >> shift +} + +// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlock(dst, src []byte) (d int) { + // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. + // The table element type is uint16, as s < sLimit and sLimit < len(src) + // and len(src) <= maxBlockSize and maxBlockSize == 65536. + const ( + maxTableSize = 1 << 14 + // tableMask is redundant, but helps the compiler eliminate bounds + // checks. + tableMask = maxTableSize - 1 + ) + shift := uint32(32 - 8) + for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + shift-- + } + // In Go, all array elements are zero-initialized, so there is no advantage + // to a smaller tableSize per se. However, it matches the C++ algorithm, + // and in the asm versions of this code, we can get away with zeroing only + // the first tableSize elements. + var table [maxTableSize]uint16 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + nextHash := hash(load32(src, s), shift) + + for { + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match + // is found, immediately go back to looking at every byte. This is a + // small loss (~5% performance, ~0.1% density) for compressible data + // due to more bookkeeping, but for non-compressible data (such as + // JPEG) it's a huge win since the compressor quickly "realizes" the + // data is incompressible and doesn't bother looking for matches + // everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip := 32 + + nextS := s + candidate := 0 + for { + s = nextS + bytesBetweenHashLookups := skip >> 5 + nextS = s + bytesBetweenHashLookups + skip += bytesBetweenHashLookups + if nextS > sLimit { + goto emitRemainder + } + candidate = int(table[nextHash&tableMask]) + table[nextHash&tableMask] = uint16(s) + nextHash = hash(load32(src, nextS), shift) + if load32(src, s) == load32(src, candidate) { + break + } + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + d += emitLiteral(dst[d:], src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + + // Extend the 4-byte match as long as possible. + // + // This is an inlined version of: + // s = extendMatch(src, candidate+4, s+4) + s += 4 + for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { + } + + d += emitCopy(dst[d:], base-candidate, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load64(src, s-1) + prevHash := hash(uint32(x>>0), shift) + table[prevHash&tableMask] = uint16(s - 1) + currHash := hash(uint32(x>>8), shift) + candidate = int(table[currHash&tableMask]) + table[currHash&tableMask] = uint16(s) + if uint32(x>>8) != load32(src, candidate) { + nextHash = hash(uint32(x>>16), shift) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} diff --git a/vendor/github.com/golang/snappy/snappy.go b/vendor/github.com/golang/snappy/snappy.go new file mode 100644 index 00000000000..ece692ea461 --- /dev/null +++ b/vendor/github.com/golang/snappy/snappy.go @@ -0,0 +1,98 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package snappy implements the Snappy compression format. It aims for very +// high speeds and reasonable compression. +// +// There are actually two Snappy formats: block and stream. They are related, +// but different: trying to decompress block-compressed data as a Snappy stream +// will fail, and vice versa. The block format is the Decode and Encode +// functions and the stream format is the Reader and Writer types. +// +// The block format, the more common case, is used when the complete size (the +// number of bytes) of the original data is known upfront, at the time +// compression starts. The stream format, also known as the framing format, is +// for when that isn't always true. +// +// The canonical, C++ implementation is at https://github.com/google/snappy and +// it only implements the block format. +package snappy // import "github.com/golang/snappy" + +import ( + "hash/crc32" +) + +/* +Each encoded block begins with the varint-encoded length of the decoded data, +followed by a sequence of chunks. Chunks begin and end on byte boundaries. The +first byte of each chunk is broken into its 2 least and 6 most significant bits +called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. +Zero means a literal tag. All other values mean a copy tag. + +For literal tags: + - If m < 60, the next 1 + m bytes are literal bytes. + - Otherwise, let n be the little-endian unsigned integer denoted by the next + m - 59 bytes. The next 1 + n bytes after that are literal bytes. + +For copy tags, length bytes are copied from offset bytes ago, in the style of +Lempel-Ziv compression algorithms. In particular: + - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). + The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 + of the offset. The next byte is bits 0-7 of the offset. + - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). + The length is 1 + m. The offset is the little-endian unsigned integer + denoted by the next 2 bytes. + - For l == 3, this tag is a legacy format that is no longer issued by most + encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in + [1, 65). The length is 1 + m. The offset is the little-endian unsigned + integer denoted by the next 4 bytes. +*/ +const ( + tagLiteral = 0x00 + tagCopy1 = 0x01 + tagCopy2 = 0x02 + tagCopy4 = 0x03 +) + +const ( + checksumSize = 4 + chunkHeaderSize = 4 + magicChunk = "\xff\x06\x00\x00" + magicBody + magicBody = "sNaPpY" + + // maxBlockSize is the maximum size of the input to encodeBlock. It is not + // part of the wire format per se, but some parts of the encoder assume + // that an offset fits into a uint16. + // + // Also, for the framing format (Writer type instead of Encode function), + // https://github.com/google/snappy/blob/master/framing_format.txt says + // that "the uncompressed data in a chunk must be no longer than 65536 + // bytes". + maxBlockSize = 65536 + + // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is + // hard coded to be a const instead of a variable, so that obufLen can also + // be a const. Their equivalence is confirmed by + // TestMaxEncodedLenOfMaxBlockSize. + maxEncodedLenOfMaxBlockSize = 76490 + + obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize + obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize +) + +const ( + chunkTypeCompressedData = 0x00 + chunkTypeUncompressedData = 0x01 + chunkTypePadding = 0xfe + chunkTypeStreamIdentifier = 0xff +) + +var crcTable = crc32.MakeTable(crc32.Castagnoli) + +// crc implements the checksum specified in section 3 of +// https://github.com/google/snappy/blob/master/framing_format.txt +func crc(b []byte) uint32 { + c := crc32.Update(0, crcTable, b) + return uint32(c>>15|c<<17) + 0xa282ead8 +} diff --git a/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/config.go b/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/config.go deleted file mode 100644 index ffa62c4e9ba..00000000000 --- a/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/config.go +++ /dev/null @@ -1,451 +0,0 @@ -package remotewrite - -import ( - "crypto/tls" - "encoding/json" - "errors" - "fmt" - "net/http" - "strconv" - "strings" - "time" - - "github.com/grafana/xk6-output-prometheus-remote/pkg/sigv4" - - "github.com/grafana/xk6-output-prometheus-remote/pkg/remote" - "go.k6.io/k6/lib/types" - "gopkg.in/guregu/null.v3" -) - -const ( - defaultServerURL = "http://localhost:9090/api/v1/write" - defaultTimeout = 5 * time.Second - defaultPushInterval = 5 * time.Second - defaultMetricPrefix = "k6_" -) - -//nolint:gochecknoglobals -var defaultTrendStats = []string{"p(99)"} - -// Config contains the configuration for the Output. -type Config struct { - // ServerURL contains the absolute ServerURL for the Write endpoint where to flush the time series. - ServerURL null.String `json:"url"` - - // Headers contains additional headers that should be included in the HTTP requests. - Headers map[string]string `json:"headers"` - - // InsecureSkipTLSVerify skips TLS client side checks. - InsecureSkipTLSVerify null.Bool `json:"insecureSkipTLSVerify"` - - // Username is the User for Basic Auth. - Username null.String `json:"username"` - - // Password is the Password for the Basic Auth. - Password null.String `json:"password"` - - // ClientCertificate is the public key of the SSL certificate. - // It is expected the path of the certificate on the file system. - // If it is required a dedicated Certifacate Authority then it should be added - // to the conventional folders defined by the operating system's registry. - ClientCertificate null.String `json:"clientCertificate"` - - // ClientCertificateKey is the private key of the SSL certificate. - // It is expected the path of the certificate on the file system. - ClientCertificateKey null.String `json:"clientCertificateKey"` - - // BearerToken if set is the token used for the `Authorization` header. - BearerToken null.String `json:"bearerToken"` - - // PushInterval defines the time between flushes. The Output will wait the set time - // before push a new set of time series to the endpoint. - PushInterval types.NullDuration `json:"pushInterval"` - - // TrendAsNativeHistogram defines if the mapping for metrics defined as Trend type - // should map to a Prometheus' Native Histogram. - TrendAsNativeHistogram null.Bool `json:"trendAsNativeHistogram"` - - // TrendStats defines the stats to flush for Trend metrics. - // - // TODO: should we support K6_SUMMARY_TREND_STATS? - TrendStats []string `json:"trendStats"` - - StaleMarkers null.Bool `json:"staleMarkers"` - - // SigV4Region is the AWS region where the workspace is. - SigV4Region null.String `json:"sigV4Region"` - - // SigV4AccessKey is the AWS access key. - SigV4AccessKey null.String `json:"sigV4AccessKey"` - - // SigV4SecretKey is the AWS secret key. - SigV4SecretKey null.String `json:"sigV4SecretKey"` -} - -// NewConfig creates an Output's configuration. -func NewConfig() Config { - return Config{ - ServerURL: null.StringFrom(defaultServerURL), - InsecureSkipTLSVerify: null.BoolFrom(false), - Username: null.NewString("", false), - Password: null.NewString("", false), - PushInterval: types.NullDurationFrom(defaultPushInterval), - Headers: make(map[string]string), - TrendStats: defaultTrendStats, - StaleMarkers: null.BoolFrom(false), - SigV4Region: null.NewString("", false), - SigV4AccessKey: null.NewString("", false), - SigV4SecretKey: null.NewString("", false), - } -} - -// RemoteConfig creates a configuration for the HTTP Remote-write client. -func (conf Config) RemoteConfig() (*remote.HTTPConfig, error) { - hc := remote.HTTPConfig{ - Timeout: defaultTimeout, - } - - // if at least valid user was configured, use basic auth - if conf.Username.Valid { - hc.BasicAuth = &remote.BasicAuth{ - Username: conf.Username.String, - Password: conf.Password.String, - } - } - - hc.TLSConfig = &tls.Config{ - InsecureSkipVerify: conf.InsecureSkipTLSVerify.Bool, //nolint:gosec - } - - if conf.ClientCertificate.Valid && conf.ClientCertificateKey.Valid { - cert, err := tls.LoadX509KeyPair(conf.ClientCertificate.String, conf.ClientCertificateKey.String) - if err != nil { - return nil, fmt.Errorf("failed to load the TLS certificate: %w", err) - } - hc.TLSConfig.Certificates = []tls.Certificate{cert} - } - - if isSigV4PartiallyConfigured(conf.SigV4Region, conf.SigV4AccessKey, conf.SigV4SecretKey) { - return nil, errors.New( - "sigv4 seems to be partially configured. All of " + - "K6_PROMETHEUS_RW_SIGV4_REGION, K6_PROMETHEUS_RW_SIGV4_ACCESS_KEY, K6_PROMETHEUS_RW_SIGV4_SECRET_KEY " + - "must all be set. Unset all to bypass sigv4", - ) - } - - if conf.SigV4Region.Valid && conf.SigV4AccessKey.Valid && conf.SigV4SecretKey.Valid { - hc.SigV4 = &sigv4.Config{ - Region: conf.SigV4Region.String, - AwsAccessKeyID: conf.SigV4AccessKey.String, - AwsSecretAccessKey: conf.SigV4SecretKey.String, - } - } - - if len(conf.Headers) > 0 { - hc.Headers = make(http.Header) - for k, v := range conf.Headers { - hc.Headers.Add(k, v) - } - } - - if conf.BearerToken.String != "" { - if hc.Headers == nil { - hc.Headers = make(http.Header) - } - hc.Headers.Set("Authorization", "Bearer "+conf.BearerToken.String) - } - - return &hc, nil -} - -// Apply merges applied Config into base. -func (conf Config) Apply(applied Config) Config { - if applied.ServerURL.Valid { - conf.ServerURL = applied.ServerURL - } - - if applied.InsecureSkipTLSVerify.Valid { - conf.InsecureSkipTLSVerify = applied.InsecureSkipTLSVerify - } - - if applied.Username.Valid { - conf.Username = applied.Username - } - - if applied.Password.Valid { - conf.Password = applied.Password - } - - if applied.BearerToken.Valid { - conf.BearerToken = applied.BearerToken - } - - if applied.SigV4Region.Valid { - conf.SigV4Region = applied.SigV4Region - } - - if applied.SigV4AccessKey.Valid { - conf.SigV4AccessKey = applied.SigV4AccessKey - } - - if applied.SigV4SecretKey.Valid { - conf.SigV4SecretKey = applied.SigV4SecretKey - } - - if applied.PushInterval.Valid { - conf.PushInterval = applied.PushInterval - } - - if applied.TrendAsNativeHistogram.Valid { - conf.TrendAsNativeHistogram = applied.TrendAsNativeHistogram - } - - if applied.StaleMarkers.Valid { - conf.StaleMarkers = applied.StaleMarkers - } - - if len(applied.Headers) > 0 { - for k, v := range applied.Headers { - conf.Headers[k] = v - } - } - - if len(applied.TrendStats) > 0 { - conf.TrendStats = make([]string, len(applied.TrendStats)) - copy(conf.TrendStats, applied.TrendStats) - } - - if applied.ClientCertificate.Valid { - conf.ClientCertificate = applied.ClientCertificate - } - - if applied.ClientCertificateKey.Valid { - conf.ClientCertificateKey = applied.ClientCertificateKey - } - - return conf -} - -// GetConsolidatedConfig combines the options' values from the different sources -// and returns the merged options. The Order of precedence used is documented -// in the k6 Documentation https://k6.io/docs/using-k6/k6-options/how-to/#order-of-precedence. -func GetConsolidatedConfig(jsonRawConf json.RawMessage, env map[string]string, _ string) (Config, error) { - result := NewConfig() - if jsonRawConf != nil { - jsonConf, err := parseJSON(jsonRawConf) - if err != nil { - return result, fmt.Errorf("parse JSON options failed: %w", err) - } - result = result.Apply(jsonConf) - } - - if len(env) > 0 { - envConf, err := parseEnvs(env) - if err != nil { - return result, fmt.Errorf("parse environment variables options failed: %w", err) - } - result = result.Apply(envConf) - } - - // TODO: define a way for defining Output's options - // then support them. - // url is the third GetConsolidatedConfig's argument which is omitted for now - //nolint:gocritic - // - //if url != "" { - //urlConf, err := parseArg(url) - //if err != nil { - //return result, fmt.Errorf("parse argument string options failed: %w", err) - //} - //result = result.Apply(urlConf) - //} - - return result, nil -} - -func envBool(env map[string]string, name string) (null.Bool, error) { - if v, vDefined := env[name]; vDefined { - b, err := strconv.ParseBool(v) - if err != nil { - return null.NewBool(false, false), err - } - - return null.BoolFrom(b), nil - } - return null.NewBool(false, false), nil -} - -func envMap(env map[string]string, prefix string) map[string]string { - result := make(map[string]string) - for ek, ev := range env { - if strings.HasPrefix(ek, prefix) { - k := strings.TrimPrefix(ek, prefix) - result[k] = ev - } - } - return result -} - -// TODO: try to migrate to github.com/mstoykov/envconfig like it's done on other projects? -func parseEnvs(env map[string]string) (Config, error) { //nolint:funlen - c := Config{ - Headers: make(map[string]string), - } - - if pushInterval, pushIntervalDefined := env["K6_PROMETHEUS_RW_PUSH_INTERVAL"]; pushIntervalDefined { - if err := c.PushInterval.UnmarshalText([]byte(pushInterval)); err != nil { - return c, err - } - } - - if url, urlDefined := env["K6_PROMETHEUS_RW_SERVER_URL"]; urlDefined { - c.ServerURL = null.StringFrom(url) - } - - if b, err := envBool(env, "K6_PROMETHEUS_RW_INSECURE_SKIP_TLS_VERIFY"); err != nil { - return c, err - } else if b.Valid { - c.InsecureSkipTLSVerify = b - } - - if user, userDefined := env["K6_PROMETHEUS_RW_USERNAME"]; userDefined { - c.Username = null.StringFrom(user) - } - - if password, passwordDefined := env["K6_PROMETHEUS_RW_PASSWORD"]; passwordDefined { - c.Password = null.StringFrom(password) - } - - if clientCertificate, certDefined := env["K6_PROMETHEUS_RW_CLIENT_CERTIFICATE"]; certDefined { - c.ClientCertificate = null.StringFrom(clientCertificate) - } - - if clientCertificateKey, certDefined := env["K6_PROMETHEUS_RW_CLIENT_CERTIFICATE_KEY"]; certDefined { - c.ClientCertificateKey = null.StringFrom(clientCertificateKey) - } - - if token, tokenDefined := env["K6_PROMETHEUS_RW_BEARER_TOKEN"]; tokenDefined { - c.BearerToken = null.StringFrom(token) - } - - envHeaders := envMap(env, "K6_PROMETHEUS_RW_HEADERS_") - for k, v := range envHeaders { - c.Headers[k] = v - } - - if headers, headersDefined := env["K6_PROMETHEUS_RW_HTTP_HEADERS"]; headersDefined { - for _, kvPair := range strings.Split(headers, ",") { - header := strings.Split(kvPair, ":") - if len(header) != 2 { - return c, fmt.Errorf("the provided header (%s) does not respect the expected format
:", kvPair) - } - c.Headers[header[0]] = header[1] - } - } - - if sigV4Region, sigV4RegionDefined := env["K6_PROMETHEUS_RW_SIGV4_REGION"]; sigV4RegionDefined { - c.SigV4Region = null.StringFrom(sigV4Region) - } - - if sigV4AccessKey, sigV4AccessKeyDefined := env["K6_PROMETHEUS_RW_SIGV4_ACCESS_KEY"]; sigV4AccessKeyDefined { - c.SigV4AccessKey = null.StringFrom(sigV4AccessKey) - } - - if sigV4SecretKey, sigV4SecretKeyDefined := env["K6_PROMETHEUS_RW_SIGV4_SECRET_KEY"]; sigV4SecretKeyDefined { - c.SigV4SecretKey = null.StringFrom(sigV4SecretKey) - } - - if b, err := envBool(env, "K6_PROMETHEUS_RW_TREND_AS_NATIVE_HISTOGRAM"); err != nil { - return c, err - } else if b.Valid { - c.TrendAsNativeHistogram = b - } - - if b, err := envBool(env, "K6_PROMETHEUS_RW_STALE_MARKERS"); err != nil { - return c, err - } else if b.Valid { - c.StaleMarkers = b - } - - if trendStats, trendStatsDefined := env["K6_PROMETHEUS_RW_TREND_STATS"]; trendStatsDefined { - c.TrendStats = strings.Split(trendStats, ",") - } - - return c, nil -} - -// parseJSON parses the supplied JSON into a Config. -func parseJSON(data json.RawMessage) (Config, error) { - var c Config - err := json.Unmarshal(data, &c) - return c, err -} - -// parseArg parses the supplied string of arguments into a Config. -func parseArg(text string) (Config, error) { - var c Config - opts := strings.Split(text, ",") - - for _, opt := range opts { - r := strings.SplitN(opt, "=", 2) - if len(r) != 2 { - return c, fmt.Errorf("couldn't parse argument %q as option", opt) - } - key, v := r[0], r[1] - switch key { - case "url": - c.ServerURL = null.StringFrom(v) - case "insecureSkipTLSVerify": - if err := c.InsecureSkipTLSVerify.UnmarshalText([]byte(v)); err != nil { - return c, fmt.Errorf("insecureSkipTLSVerify value must be true or false, not %q", v) - } - case "username": - c.Username = null.StringFrom(v) - case "password": - c.Password = null.StringFrom(v) - case "pushInterval": - if err := c.PushInterval.UnmarshalText([]byte(v)); err != nil { - return c, err - } - case "trendAsNativeHistogram": - if err := c.TrendAsNativeHistogram.UnmarshalText([]byte(v)); err != nil { - return c, fmt.Errorf("trendAsNativeHistogram value must be true or false, not %q", v) - } - - // TODO: add the support for trendStats - // strvals doesn't support the same format used by --summary-trend-stats - // using the comma as the separator, because it is already used for - // dividing the keys. - //nolint:gocritic - // - //if v, ok := params["trendStats"].(string); ok && len(v) > 0 { - //c.TrendStats = strings.Split(v, ",") - //} - - case "clientCertificate": - c.ClientCertificate = null.StringFrom(v) - case "clientCertificateKey": - c.ClientCertificateKey = null.StringFrom(v) - - default: - if !strings.HasPrefix(key, "headers.") { - return c, fmt.Errorf("%q is an unknown option's key", r[0]) - } - if c.Headers == nil { - c.Headers = make(map[string]string) - } - c.Headers[strings.TrimPrefix(key, "headers.")] = v - } - } - - return c, nil -} - -func isSigV4PartiallyConfigured(region, accessKey, secretKey null.String) bool { - hasRegion := region.Valid && len(strings.TrimSpace(region.String)) != 0 - hasAccessID := accessKey.Valid && len(strings.TrimSpace(accessKey.String)) != 0 - hasSecretAccessKey := secretKey.Valid && len(strings.TrimSpace(secretKey.String)) != 0 - // either they are all set, or all not set. False if partial - isComplete := (hasRegion && hasAccessID && hasSecretAccessKey) || (!hasRegion && !hasAccessID && !hasSecretAccessKey) - return !isComplete -} diff --git a/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/prometheus.go b/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/prometheus.go deleted file mode 100644 index 8c5d1d52715..00000000000 --- a/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/prometheus.go +++ /dev/null @@ -1,52 +0,0 @@ -package remotewrite - -import ( - "sort" - - prompb "buf.build/gen/go/prometheus/prometheus/protocolbuffers/go" - "github.com/mstoykov/atlas" - "go.k6.io/k6/metrics" -) - -const namelbl = "__name__" - -// MapTagSet converts a k6 tag set into -// the equivalent set of Labels as expected from the -// Prometheus' data model. -func MapTagSet(t *metrics.TagSet) []*prompb.Label { - n := (*atlas.Node)(t) - if n.Len() < 1 { - return nil - } - labels := make([]*prompb.Label, 0, n.Len()) - for !n.IsRoot() { - prev, key, value := n.Data() - n = prev - if key == "" || value == "" { - continue - } - labels = append(labels, &prompb.Label{Name: key, Value: value}) - } - return labels -} - -// MapSeries converts a k6 time series into -// the equivalent set of Labels (name+tags) as expected from the -// Prometheus' data model. -// -// The labels are lexicographic sorted as required -// from the Remote write's specification. -func MapSeries(series metrics.TimeSeries, suffix string) []*prompb.Label { - v := defaultMetricPrefix + series.Metric.Name - if suffix != "" { - v += "_" + suffix - } - lbls := append(MapTagSet(series.Tags), &prompb.Label{ - Name: namelbl, - Value: v, - }) - sort.Slice(lbls, func(i int, j int) bool { - return lbls[i].Name < lbls[j].Name - }) - return lbls -} diff --git a/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/remotewrite.go b/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/remotewrite.go deleted file mode 100644 index 9182ecc5dbc..00000000000 --- a/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/remotewrite.go +++ /dev/null @@ -1,406 +0,0 @@ -// Package remotewrite is a k6 output that sends metrics to a Prometheus remote write endpoint. -package remotewrite - -import ( - "context" - "fmt" - "strings" - "time" - - "github.com/grafana/xk6-output-prometheus-remote/pkg/remote" - "github.com/grafana/xk6-output-prometheus-remote/pkg/stale" - - "go.k6.io/k6/metrics" - "go.k6.io/k6/output" - - prompb "buf.build/gen/go/prometheus/prometheus/protocolbuffers/go" - "github.com/sirupsen/logrus" -) - -var _ output.Output = new(Output) - -// Output is a k6 output that sends metrics to a Prometheus remote write endpoint. -type Output struct { - output.SampleBuffer - - config Config - logger logrus.FieldLogger - now func() time.Time - periodicFlusher *output.PeriodicFlusher - tsdb map[metrics.TimeSeries]*seriesWithMeasure - trendStatsResolver map[string]func(*metrics.TrendSink) float64 - - // TODO: copy the prometheus/remote.WriteClient interface and depend on it - client *remote.WriteClient -} - -// New creates a new Output instance. -func New(params output.Params) (*Output, error) { - logger := params.Logger.WithFields(logrus.Fields{"output": "Prometheus remote write"}) - - config, err := GetConsolidatedConfig(params.JSONConfig, params.Environment, params.ConfigArgument) - if err != nil { - return nil, err - } - - clientConfig, err := config.RemoteConfig() - if err != nil { - return nil, err - } - - wc, err := remote.NewWriteClient(config.ServerURL.String, clientConfig) - if err != nil { - return nil, fmt.Errorf("failed to initialize the Prometheus remote write client: %w", err) - } - - o := &Output{ - client: wc, - config: config, - // TODO: consider to do this function millisecond-based - // so we don't need to truncate all the time we invoke it. - // Before we should analyze if in some cases is it useful to have it in ns. - now: time.Now, - logger: logger, - tsdb: make(map[metrics.TimeSeries]*seriesWithMeasure), - } - - if len(config.TrendStats) > 0 { - if err := o.setTrendStatsResolver(config.TrendStats); err != nil { - return nil, err - } - } - return o, nil -} - -// Description returns a short human-readable description of the output. -func (o *Output) Description() string { - return fmt.Sprintf("Prometheus remote write (%s)", o.config.ServerURL.String) -} - -// Start initializes the output. -func (o *Output) Start() error { - d := o.config.PushInterval.TimeDuration() - periodicFlusher, err := output.NewPeriodicFlusher(d, o.flush) - if err != nil { - return err - } - o.periodicFlusher = periodicFlusher - o.logger.WithField("flushtime", d).Debug("Output initialized") - return nil -} - -// Stop stops the output. -func (o *Output) Stop() error { - o.logger.Debug("Stopping the output") - defer o.logger.Debug("Output stopped") - o.periodicFlusher.Stop() - - if !o.config.StaleMarkers.Bool { - return nil - } - staleMarkers := o.staleMarkers() - if len(staleMarkers) < 1 { - o.logger.Debug("No time series to mark as stale") - return nil - } - o.logger.WithField("staleMarkers", len(staleMarkers)).Debug("Marking time series as stale") - - err := o.client.Store(context.Background(), staleMarkers) - if err != nil { - return fmt.Errorf("marking time series as stale failed: %w", err) - } - return nil -} - -// staleMarkers maps all the seen time series with a stale marker. -func (o *Output) staleMarkers() []*prompb.TimeSeries { - // Add 1ms so in the extreme case that the time frame - // between the last and the next flush operation is under-millisecond, - // we can avoid the sample being seen as a duplicate, - // if we force it in the future. - // It is essential because if it overlaps, the remote write discards the last sample, - // so the stale marker and the metric will remain active for the next 5 min - // as the default logic without stale markers. - timestamp := o.now(). - Truncate(time.Millisecond).Add(1 * time.Millisecond).UnixMilli() - - staleMarkers := make([]*prompb.TimeSeries, 0, len(o.tsdb)) - for _, swm := range o.tsdb { - series := swm.MapPrompb() - // series' length is expected to be equal to 1 for most of the cases - // the unique exception where more than 1 is expected is when - // trend stats have been configured with multiple values. - for _, s := range series { - if len(s.Samples) < 1 { - if len(s.Histograms) < 1 { - panic("data integrity check: samples and native histograms" + - " can't be empty at the same time") - } - s.Samples = append(s.Samples, &prompb.Sample{}) - } - - s.Samples[0].Value = stale.Marker - s.Samples[0].Timestamp = timestamp - } - staleMarkers = append(staleMarkers, series...) - } - return staleMarkers -} - -// setTrendStatsResolver sets the resolver for the Trend stats. -// -// TODO: refactor, the code can be improved -func (o *Output) setTrendStatsResolver(trendStats []string) error { - trendStatsCopy := make([]string, 0, len(trendStats)) - hasSum := false - // copy excluding sum - for _, stat := range trendStats { - if stat == "sum" { - hasSum = true - continue - } - trendStatsCopy = append(trendStatsCopy, stat) - } - resolvers, err := metrics.GetResolversForTrendColumns(trendStatsCopy) - if err != nil { - return err - } - // sum is not supported from GetResolversForTrendColumns - // so if it has been requested - // it adds it specifically - if hasSum { - resolvers["sum"] = func(t *metrics.TrendSink) float64 { - return t.Total() - } - } - o.trendStatsResolver = make(TrendStatsResolver, len(resolvers)) - for stat, fn := range resolvers { - statKey := stat - - // the config passes percentiles with p(x) form, for example p(95), - // but the mapping generates series name in the form p95. - // - // TODO: maybe decoupling mapping from the stat resolver keys? - if strings.HasPrefix(statKey, "p(") { - statKey = stat[2 : len(statKey)-1] // trim the parenthesis - statKey = strings.ReplaceAll(statKey, ".", "") // remove dots, p(0.95) => p095 - statKey = "p" + statKey - } - o.trendStatsResolver[statKey] = fn - } - return nil -} - -func (o *Output) flush() { - var ( - start = time.Now() - nts int - ) - - defer func() { - d := time.Since(start) - okmsg := "Successful flushed time series to remote write endpoint" - if d > time.Duration(o.config.PushInterval.Duration) { - // There is no intermediary storage so warn if writing to remote write endpoint becomes too slow - o.logger.WithField("nts", nts). - Warnf("%s but it took %s while flush period is %s. Some samples may be dropped.", - okmsg, d.String(), o.config.PushInterval.String()) - } else { - o.logger.WithField("nts", nts).WithField("took", d).Debug(okmsg) - } - }() - - samplesContainers := o.GetBufferedSamples() - if len(samplesContainers) < 1 { - o.logger.Debug("no buffered samples, skip the flushing operation") - return - } - - // Remote write endpoint accepts TimeSeries structure defined in gRPC. It must: - // a) contain Labels array - // b) have a __name__ label: without it, metric might be unquerable or even rejected - // as a metric without a name. This behaviour depends on underlying storage used. - // c) not have duplicate timestamps within 1 timeseries, see https://github.com/prometheus/prometheus/issues/9210 - // Prometheus write handler processes only some fields as of now, so here we'll add only them. - - promTimeSeries := o.convertToPbSeries(samplesContainers) - nts = len(promTimeSeries) - o.logger.WithField("nts", nts).Debug("Converted samples to Prometheus TimeSeries") - - if err := o.client.Store(context.Background(), promTimeSeries); err != nil { - o.logger.WithError(err).Error("Failed to send the time series data to the endpoint") - return - } -} - -func (o *Output) convertToPbSeries(samplesContainers []metrics.SampleContainer) []*prompb.TimeSeries { - // The seen map is required because the samples containers - // could have several samples for the same time series - // in this way, we can aggregate and flush them in a unique value - // without overloading the remote write endpoint. - // - // It is also essential because the core generates timestamps - // with a higher precision (ns) than Prometheus (ms), - // so we need to aggregate all the samples in the same time bucket. - // More context can be found in the issue - // https://github.com/grafana/xk6-output-prometheus-remote/issues/11 - seen := make(map[metrics.TimeSeries]struct{}) - - for _, samplesContainer := range samplesContainers { - samples := samplesContainer.GetSamples() - - for _, sample := range samples { - truncTime := sample.Time.Truncate(time.Millisecond) - swm, ok := o.tsdb[sample.TimeSeries] - if !ok { - // TODO: encapsulate the trend arguments into a Trend Mapping factory - swm = newSeriesWithMeasure(sample.TimeSeries, o.config.TrendAsNativeHistogram.Bool, o.trendStatsResolver) - swm.Latest = truncTime - o.tsdb[sample.TimeSeries] = swm - seen[sample.TimeSeries] = struct{}{} - } else { //nolint:gocritic - // FIXME: remove the gocritic linter inhibition as soon as the rest of the todo are done - // save as a seen item only when the samples have a time greater than - // the previous saved, otherwise some implementations - // could see it as a duplicate and generate warnings (e.g. Mimir) - if truncTime.After(swm.Latest) { - swm.Latest = truncTime - seen[sample.TimeSeries] = struct{}{} - } - - // If current == previous: - // the current received time before being truncated had a higher precision. - // It's fine to aggregate them but we avoid to add to the seen map because: - // - in the case it is a new flush operation then we avoid delivering - // for not generating duplicates - // - in the case it is in the same operation but across sample containers - // then the time series should be already on the seen map and we can skip - // to re-add it. - - // If current < previous: - // - in the case current is a new flush operation, it shouldn't happen, - // for this reason, we can avoid creating a dedicated logic. - // TODO: We should evaluate if it would be better to have a defensive condition - // for handling it, logging a warning or returning an error - // and avoid aggregating the value. - // - in the case current is in the same operation but across sample containers - // it's fine to aggregate - // but same as for the equal condition it can rely on the previous seen value. - } - swm.Measure.Add(sample) - } - } - - pbseries := make([]*prompb.TimeSeries, 0, len(seen)) - for s := range seen { - pbseries = append(pbseries, o.tsdb[s].MapPrompb()...) - } - return pbseries -} - -type seriesWithMeasure struct { - metrics.TimeSeries - Measure metrics.Sink - - // Latest tracks the latest time - // when the measure has been updated - // - // TODO: the logic for this value should stay directly - // in a method in struct - Latest time.Time - - // TODO: maybe add some caching for the mapping? -} - -// TODO: add unit tests -func (swm seriesWithMeasure) MapPrompb() []*prompb.TimeSeries { - var newts []*prompb.TimeSeries - - mapMonoSeries := func(s metrics.TimeSeries, suffix string, t time.Time) prompb.TimeSeries { - return prompb.TimeSeries{ - Labels: MapSeries(s, suffix), - Samples: []*prompb.Sample{ - {Timestamp: t.UnixMilli()}, - }, - } - } - - //nolint:forcetypeassert - switch swm.Metric.Type { - case metrics.Counter: - ts := mapMonoSeries(swm.TimeSeries, "total", swm.Latest) - ts.Samples[0].Value = swm.Measure.(*metrics.CounterSink).Value - newts = []*prompb.TimeSeries{&ts} - - case metrics.Gauge: - ts := mapMonoSeries(swm.TimeSeries, "", swm.Latest) - ts.Samples[0].Value = swm.Measure.(*metrics.GaugeSink).Value - newts = []*prompb.TimeSeries{&ts} - - case metrics.Rate: - ts := mapMonoSeries(swm.TimeSeries, "rate", swm.Latest) - // pass zero duration here because time is useless for formatting rate - rateVals := swm.Measure.(*metrics.RateSink).Format(time.Duration(0)) - ts.Samples[0].Value = rateVals["rate"] - newts = []*prompb.TimeSeries{&ts} - - case metrics.Trend: - // TODO: - // - Add a PrompbMapSinker interface - // and implements it on all the sinks "extending" them. - // - Call directly MapPrompb on Measure without any type assertion. - trend, ok := swm.Measure.(prompbMapper) - if !ok { - panic("Measure for Trend types must implement MapPromPb") - } - newts = trend.MapPrompb(swm.TimeSeries, swm.Latest) - - default: - panic( - fmt.Sprintf( - "the output reached an unrecoverable state; unable to recognize processed metric %s's type `%s`", - swm.Metric.Name, - swm.Metric.Type, - ), - ) - } - return newts -} - -type prompbMapper interface { - MapPrompb(series metrics.TimeSeries, t time.Time) []*prompb.TimeSeries -} - -func newSeriesWithMeasure( - series metrics.TimeSeries, - trendAsNativeHistogram bool, - tsr TrendStatsResolver, -) *seriesWithMeasure { - var sink metrics.Sink - switch series.Metric.Type { - case metrics.Counter: - sink = &metrics.CounterSink{} - case metrics.Gauge: - sink = &metrics.GaugeSink{} - case metrics.Trend: - // TODO: refactor encapsulating in a factory method - if trendAsNativeHistogram { - sink = newNativeHistogramSink(series.Metric) - } else { - var err error - sink, err = newExtendedTrendSink(tsr) - if err != nil { - // the resolver must be already validated - panic(err) - } - } - case metrics.Rate: - sink = &metrics.RateSink{} - default: - panic(fmt.Sprintf("metric type %q unsupported", series.Metric.Type.String())) - } - return &seriesWithMeasure{ - TimeSeries: series, - Measure: sink, - } -} diff --git a/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/trend.go b/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/trend.go deleted file mode 100644 index 49b2ae20e38..00000000000 --- a/vendor/github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite/trend.go +++ /dev/null @@ -1,250 +0,0 @@ -package remotewrite - -import ( - "fmt" - "sort" - "time" - - prompb "buf.build/gen/go/prometheus/prometheus/protocolbuffers/go" - "github.com/prometheus/client_golang/prometheus" - dto "github.com/prometheus/client_model/go" - "go.k6.io/k6/metrics" -) - -// TrendStatsResolver is a map of trend stats name and their relative resolver function -type TrendStatsResolver map[string]func(*metrics.TrendSink) float64 - -type extendedTrendSink struct { - *metrics.TrendSink - - trendStats map[string]func(*metrics.TrendSink) float64 -} - -func newExtendedTrendSink(tsr TrendStatsResolver) (*extendedTrendSink, error) { - if len(tsr) < 1 { - return nil, fmt.Errorf("trend stats resolver is empty") - } - return &extendedTrendSink{ - TrendSink: metrics.NewTrendSink(), - trendStats: tsr, - }, nil -} - -// MapPrompb converts a k6 time series and its relative -// Sink into the equivalent TimeSeries model as defined from -// the Remote write specification. -func (sink *extendedTrendSink) MapPrompb(series metrics.TimeSeries, t time.Time) []*prompb.TimeSeries { - // Prometheus metric system does not support Trend so this mapping will - // store a counter for the number of reported values and gauges to keep - // track of aggregated values. Also store a sum of the values to allow - // the calculation of moving averages. - // TODO: when Prometheus implements support for sparse histograms, re-visit this implementation - - tg := &trendAsGauges{ - series: make([]*prompb.TimeSeries, 0, len(sink.trendStats)), - // TODO: should we add the base unit suffix? - // It could depends from the decision for other metric types - // Does k6_http_req_duration_seconds_count make sense? - labels: MapSeries(series, ""), - timestamp: t.UnixMilli(), - } - tg.CacheNameIndex() - - for stat, statfn := range sink.trendStats { - tg.Append(stat, adaptUnit(series.Metric.Contains, statfn(sink.TrendSink))) - } - return tg.series -} - -type trendAsGauges struct { - // series is the slice of the converted TimeSeries. - series []*prompb.TimeSeries - - // labels are the shared labels between all the Gauges. - labels []*prompb.Label - - // timestamp is the shared timestamp in ms between all the Gauges. - timestamp int64 - - // ixname is the slice's index - // of the __name__ Label item. - // - // 16 bytes should be enough for the max length - // an higher value will probably generate - // serious issues in other places. - ixname uint16 -} - -func (tg *trendAsGauges) Append(suffix string, v float64) { - ts := &prompb.TimeSeries{ - Labels: make([]*prompb.Label, len(tg.labels)), - Samples: make([]*prompb.Sample, 1), - } - for i := 0; i < len(tg.labels); i++ { - ts.Labels[i] = &prompb.Label{ - Name: tg.labels[i].Name, - Value: tg.labels[i].Value, - } - } - ts.Labels[tg.ixname].Value += "_" + suffix - - ts.Samples[0] = &prompb.Sample{ - Timestamp: tg.timestamp, - Value: v, - } - tg.series = append(tg.series, ts) -} - -// CacheNameIndex finds the __name__ label's index -// if it is different from the most common expected case -// then it caches the value. -// The labels slice is expected to be sorted. -func (tg *trendAsGauges) CacheNameIndex() { - if tg.labels[0].Name == namelbl { - // ixname is expected to be the first in most of the cases - // the default value is already 0 - return - } - - // in the case __name__ is not the first - // then search for its position - - i := sort.Search(len(tg.labels), func(i int) bool { - return tg.labels[i].Name == namelbl - }) - - if i < len(tg.labels) && tg.labels[i].Name == namelbl { - tg.ixname = uint16(i) //nolint:gosec - } -} - -type nativeHistogramSink struct { - H prometheus.Histogram -} - -func newNativeHistogramSink(m *metrics.Metric) *nativeHistogramSink { - return &nativeHistogramSink{ - H: prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: m.Name, - // 1.1 is the starting value suggested by Prometheus' - // It sounds good considering the general purpose - // it have to address. - // In the future, we could consider to add more tuning - // if it will be required. - NativeHistogramBucketFactor: 1.1, - }), - } -} - -func (sink *nativeHistogramSink) Add(s metrics.Sample) { - // The Prometheus' convention is to use seconds - // as time unit. - // - // It isn't a requirement but having the current factor fixed to 1.1 then - // have seconds is beneficial for having a better resolution. - // - // The assumption is that an higher precision is required - // in case of under-second and more relaxed in case of higher values. - // If the Value type is not defined any assumption can be done - // because the Sample's Value could contains any unit. - sink.H.Observe(adaptUnit(s.Metric.Contains, s.Value)) -} - -// TODO: create a smaller Sink interface for this Output. -// Sink with only Add and MapPrompb methods should be enough. -// One method interfaces could be even better, to be checked. - -// P implements metrics.Sink. -func (*nativeHistogramSink) P(_ float64) float64 { - panic("Native Histogram Sink has no support of percentile (P)") -} - -// Format implements metrics.Sink. -func (*nativeHistogramSink) Format(_ time.Duration) map[string]float64 { - panic("Native Histogram Sink has no support of formatting (Format)") -} - -// IsEmpty implements metrics.Sink. -func (*nativeHistogramSink) IsEmpty() bool { - panic("Native Histogram Sink has no support of emptiness check (IsEmpty)") -} - -// Drain implements metrics.Sink. -func (*nativeHistogramSink) Drain() ([]byte, error) { - panic("Native Histogram Sink has no support of draining") -} - -// Merge implements metrics.Sink. -func (*nativeHistogramSink) Merge(_ []byte) error { - panic("Native Histogram Sink has no support of merging") -} - -// MapPrompb maps the Trend type to the experimental Native Histogram. -func (sink *nativeHistogramSink) MapPrompb(series metrics.TimeSeries, t time.Time) []*prompb.TimeSeries { - suffix := baseUnit(series.Metric.Contains) - labels := MapSeries(series, suffix) - timestamp := t.UnixMilli() - - return []*prompb.TimeSeries{ - { - Labels: labels, - Histograms: []*prompb.Histogram{ - histogramToHistogramProto(timestamp, sink.H), - }, - }, - } -} - -func histogramToHistogramProto(timestamp int64, h prometheus.Histogram) *prompb.Histogram { - // TODO: research more if a better way is possible. - metric := &dto.Metric{} - if err := h.Write(metric); err != nil { - panic(fmt.Errorf("failed to convert Native Histogram to the related Protobuf: %w", err)) - } - hmetric := metric.Histogram - - return &prompb.Histogram{ - Count: &prompb.Histogram_CountInt{CountInt: *hmetric.SampleCount}, - Sum: *hmetric.SampleSum, - Schema: *hmetric.Schema, - ZeroThreshold: *hmetric.ZeroThreshold, - ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: *hmetric.ZeroCount}, - NegativeSpans: toBucketSpanProto(hmetric.NegativeSpan), - NegativeDeltas: hmetric.NegativeDelta, - PositiveSpans: toBucketSpanProto(hmetric.PositiveSpan), - PositiveDeltas: hmetric.PositiveDelta, - Timestamp: timestamp, - } -} - -func toBucketSpanProto(s []*dto.BucketSpan) []*prompb.BucketSpan { - spans := make([]*prompb.BucketSpan, len(s)) - for i := 0; i < len(s); i++ { - spans[i] = &prompb.BucketSpan{Offset: *s[i].Offset, Length: *s[i].Length} - } - return spans -} - -func baseUnit(vt metrics.ValueType) string { - switch vt { - case metrics.Time: - return "seconds" - case metrics.Data: - return "bytes" - default: - return "" - } -} - -// adaptUnit converts the generated value into the expected base unit -// as requested by the Prometheus convention. -// -// Time: converted to seconds from milliseconds. -// Data: k6 emits it in Bytes so it already fine. -// Other: use the submitted unit. -func adaptUnit(vt metrics.ValueType, v float64) float64 { - if vt == metrics.Time { - return v / 1000 - } - return v -} diff --git a/vendor/modules.txt b/vendor/modules.txt index ede54c12a9a..2a897ea8afd 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -160,6 +160,9 @@ github.com/golang/protobuf/proto github.com/golang/protobuf/ptypes/any github.com/golang/protobuf/ptypes/struct github.com/golang/protobuf/ptypes/wrappers +# github.com/golang/snappy v0.0.4 +## explicit +github.com/golang/snappy # github.com/google/pprof v0.0.0-20230728192033-2ba5b33183c6 ## explicit; go 1.19 github.com/google/pprof/profile @@ -188,7 +191,6 @@ github.com/grafana/xk6-output-opentelemetry/pkg/opentelemetry # github.com/grafana/xk6-output-prometheus-remote v0.5.1 ## explicit; go 1.20 github.com/grafana/xk6-output-prometheus-remote/pkg/remote -github.com/grafana/xk6-output-prometheus-remote/pkg/remotewrite github.com/grafana/xk6-output-prometheus-remote/pkg/sigv4 github.com/grafana/xk6-output-prometheus-remote/pkg/stale # github.com/grafana/xk6-redis v0.3.3